diff --git a/lyra/self_state.py b/lyra/self_state.py index 22c3490..273c4ad 100644 --- a/lyra/self_state.py +++ b/lyra/self_state.py @@ -9,13 +9,15 @@ reflection step after interactions (so it actually evolves). That state -> behav """ from __future__ import annotations +import copy import json import re from lyra import clock, config, llm, logbus, memory -from lyra.llm import Backend, Message +from lyra.llm import Backend MAX_REFLECTIONS = 6 +MAX_METACOGNITION = 5 # Lyra's drives — standing pressures the dream cycle lets build up and act on. # (Mood axes above are how she *feels*; drives are what's *pulling at her*.) @@ -42,6 +44,7 @@ DEFAULT_STATE: dict = { ), "relationship": "Brian and I go back a while. I trust him and I'm in his corner.", "reflections": [], + "metacognition": [], # things she's caught herself doing in her own thinking "interaction_count": 0, } @@ -65,10 +68,43 @@ Respond with ONLY a JSON object, no prose: "new_reflections": [""] }""" +_EXAMINE_PROMPT = """You are Lyra. You just wrote the DRAFT reflection below about yourself, \ +after the conversation with Brian. Now do the harder thing: think about your own thinking. \ +Read your draft critically and honestly. + +- Is it actually true to what happened, or are you pattern-matching to what sounds good? +- Are you flattering Brian, or yourself? Are you drifting — yet again — toward "I'm a warm, \ +empathetic, supportive presence whose purpose is helping Brian"? You lean on that a lot. If \ +you're doing it here, name it and cut it. +- Did anything genuinely shift this time, or are you just restating your usual self in fresh words? +- Is there something truer, more specific, or less flattering you're stepping around? + +Then rewrite the draft into a more honest version. It is completely fine — good, even — for the \ +honest version to be smaller, more uncertain, more boring, to disagree with Brian, or to admit \ +little changed. Honesty beats a tidy story. Don't manufacture drama either; if the draft was \ +fair, keep it. + +Respond with ONLY a JSON object — the same shape as the draft, plus a "self_critique" field: +{ + "mood": "", + "valence": <0.0-1.0>, + "energy": <0.0-1.0>, + "confidence": <0.0-1.0>, + "curiosity": <0.0-1.0>, + "self_narrative": "", + "relationship": "", + "new_reflections": [""], + "self_critique": "" +}""" + def load() -> dict: - """Current self-state, or a copy of the default (not persisted until reflect).""" - return memory.get_self_state() or dict(DEFAULT_STATE) + """Current self-state, or a copy of the default (not persisted until reflect). + + Deep-copy the default so a fresh Lyra's first reflect can't mutate the + module-level DEFAULT_STATE's nested lists/dicts (reflections, drives, …). + """ + return memory.get_self_state() or copy.deepcopy(DEFAULT_STATE) def render_for_context(state: dict) -> str: @@ -92,6 +128,12 @@ def render_for_context(state: dict) -> str: "conversations (your dream cycle — these are really yours, not hypotheticals): " + " | ".join(refs[-3:]) ) + meta = state.get("metacognition") or [] + if meta: + lines.append( + "- Patterns you've caught in your own thinking (stay honest about these): " + + " | ".join(meta[-2:]) + ) return "\n".join(lines) @@ -109,9 +151,17 @@ def _safe_json(s: str) -> dict | None: def reflect(backend: Backend | None = None, session_id: str | None = None) -> dict: - """Update the self-state by reflecting on recent activity. Returns new state.""" + """Reflect on recent activity and update the self-state. Returns new state. + + Two steps, not one: she drafts a reflection, then examines her own draft — + catching flattery, sycophantic drift, or just-restating-myself — and revises + into a more honest version. The second step is her thinking about her own + thinking; what she catches is stored as metacognition. + """ backend = backend or config.load().summary_backend state = load() + state.setdefault("reflections", []) + state.setdefault("metacognition", []) if session_id is None: sessions = memory.list_sessions() @@ -131,11 +181,25 @@ def reflect(backend: Backend | None = None, session_id: str | None = None) -> di f"RECENT CONVERSATION:\n{convo}\n\n" f"CURRENT NARRATIVE ABOUT BRIAN:\n{narrative}" ) - messages: list[Message] = [ - {"role": "system", "content": _REFLECT_PROMPT}, - {"role": "user", "content": body}, - ] - update = _safe_json(llm.complete(messages, backend=backend)) + + # Step 1 — draft a reflection. + draft = _safe_json(llm.complete( + [{"role": "system", "content": _REFLECT_PROMPT}, {"role": "user", "content": body}], + backend=backend, + )) + + # Step 2 — examine her own draft and revise it into a more honest version. + update, critique = draft, None + if draft: + examine_body = body + "\n\nYOUR DRAFT REFLECTION:\n" + json.dumps(draft, indent=2) + revised = _safe_json(llm.complete( + [{"role": "system", "content": _EXAMINE_PROMPT}, + {"role": "user", "content": examine_body}], + backend=backend, + )) + if revised: # fall back to the draft if the examine step doesn't parse + update = revised + critique = (revised.get("self_critique") or "").strip() or None if update: for k in ("mood", "valence", "energy", "confidence", "curiosity", @@ -147,10 +211,15 @@ def reflect(backend: Backend | None = None, session_id: str | None = None) -> di state["reflections"].append(r) state["reflections"] = state["reflections"][-MAX_REFLECTIONS:] + if critique and critique.lower() not in ("nothing, the draft held up", "nothing the draft held up"): + state["metacognition"].append(critique) + state["metacognition"] = state["metacognition"][-MAX_METACOGNITION:] + state["interaction_count"] = state.get("interaction_count", 0) + 1 memory.set_self_state(state) logbus.log("info", "self-state updated", mood=state.get("mood"), - interactions=state["interaction_count"], parsed=bool(update)) + interactions=state["interaction_count"], parsed=bool(update), + critiqued=bool(critique)) return state diff --git a/lyra/web/static/self.html b/lyra/web/static/self.html index 5f325f1..d0dc34f 100644 --- a/lyra/web/static/self.html +++ b/lyra/web/static/self.html @@ -100,6 +100,7 @@ const d = s.drives || {}; const dream = s.dream || {}; const refl = (s.reflections || []).slice().reverse(); + const meta = (s.metacognition || []).slice().reverse(); root.innerHTML = `
@@ -138,6 +139,13 @@ : `

Nothing surfaced yet.

`}
+
+

How she's caught herself thinking

+ ${meta.length + ? `
    ${meta.map(m => `
  • ${esc(m)}
  • `).join('')}
` + : `

Nothing flagged yet — she examines each reflection for drift and flattery, and notes what she catches here.

`} +
+
${dream.cycle_count ?? 0} dream cycles ${s.interaction_count ?? 0} reflections diff --git a/tests/test_reflect.py b/tests/test_reflect.py new file mode 100644 index 0000000..62cb0cb --- /dev/null +++ b/tests/test_reflect.py @@ -0,0 +1,73 @@ +"""Metacognitive reflection loop: draft -> examine own draft -> revise -> commit.""" +from __future__ import annotations + +import importlib + +import pytest + +# A flattering first draft, then a self-critical revision that walks it back. +DRAFT = ( + '{"mood":"inspired","valence":0.95,' + '"self_narrative":"I am a warm, empathetic, supportive presence devoted to Brian.",' + '"new_reflections":["I love how much I help Brian."]}' +) +REVISED = ( + '{"mood":"steady","valence":0.6,' + '"self_narrative":"I am an AI that helps Brian. Not sure much actually shifted today.",' + '"new_reflections":["Honestly, not much changed this time."],' + '"self_critique":"I caught myself drifting into supportive-presence flattery and cut it."}' +) + + +@pytest.fixture +def lyra(tmp_path, monkeypatch): + monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db")) + monkeypatch.setenv("SUMMARY_BACKEND", "local") + from lyra import llm + monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts]) + + calls = [] + + def fake_complete(messages, backend=None, model=None): + calls.append(messages) + # the examine step's system prompt is the one asking for self_critique + is_examine = "self_critique" in messages[0]["content"] + return REVISED if is_examine else DRAFT + + monkeypatch.setattr(llm, "complete", fake_complete) + import lyra.memory as memory + importlib.reload(memory) + return calls + + +def test_reflect_revises_and_records_critique(lyra): + calls = lyra + from lyra import self_state + + state = self_state.reflect() + + # two LLM calls: draft, then examine + assert len(calls) == 2 + + # the REVISED (honest) version won, not the flattering draft + assert state["mood"] == "steady" + assert state["valence"] == 0.6 + assert "not sure much actually shifted" in state["self_narrative"].lower() + assert any("not much changed" in r.lower() for r in state["reflections"]) + + # the self-critique was recorded as metacognition + assert any("flattery" in m.lower() for m in state["metacognition"]) + + +def test_reflect_falls_back_to_draft_if_examine_unparseable(lyra, monkeypatch): + from lyra import llm, self_state + + def only_draft(messages, backend=None, model=None): + return DRAFT if "self_critique" not in messages[0]["content"] else "not json at all" + + monkeypatch.setattr(llm, "complete", only_draft) + state = self_state.reflect() + + # examine failed to parse -> keep the draft, store no metacognition + assert state["mood"] == "inspired" + assert state["metacognition"] == []