a705e573a9
The remaining feedback loop: reflect() dumped her full self-state (incl. self_narrative) into the prompt and asked her to "update" it -> paraphrase -> save -> feed back -> calcify. That (not the model) is what generated the recurring "supportive presence balancing emotional intelligence for Brian" drift — even Dolphin echoed it when handed the saved narrative. Fix (her inner life now runs on one cognition model): - reflect() no longer rewrites self_narrative/relationship. It uses associative grist (cognition.spontaneous_seed + activate) instead of rereading the bio, reflects THROUGH a stable IDENTITY_ANCHOR (lens, not canvas), and updates only the transient state (mood axes + noticings + metacognition + journal). - self_narrative is now slow-consolidated: every CONSOLIDATE_EVERY (5) reflections, _consolidate_self() re-derives it from accumulated reflections + the anchor — never from the old narrative (the anti-loop core). Tethered to the anchor so it grows without drifting into generic-helper land. - reset_self_narrative() + ran once on prod (her narrative was deeply drifted: "my core identity as a tool for support... serve Brian and other users"). - Prompts drop the self_narrative/relationship fields. Tests updated + consolidation tests. Suite 75 green, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
110 lines
4.2 KiB
Python
110 lines
4.2 KiB
Python
"""Metacognitive reflection loop: draft -> examine own draft -> revise -> commit."""
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
|
|
import pytest
|
|
|
|
# A flattering first draft, then a self-critical revision that walks it back.
|
|
DRAFT = (
|
|
'{"mood":"inspired","valence":0.95,'
|
|
'"self_narrative":"I am a warm, empathetic, supportive presence devoted to Brian.",'
|
|
'"new_reflections":["I love how much I help Brian."]}'
|
|
)
|
|
REVISED = (
|
|
'{"mood":"steady","valence":0.6,'
|
|
'"self_narrative":"I am an AI that helps Brian. Not sure much actually shifted today.",'
|
|
'"new_reflections":["Honestly, not much changed this time."],'
|
|
'"self_critique":"I caught myself drifting into supportive-presence flattery and cut it."}'
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def lyra(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
|
|
monkeypatch.setenv("SUMMARY_BACKEND", "local")
|
|
from lyra import llm
|
|
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
|
|
|
|
calls = []
|
|
|
|
def fake_complete(messages, backend=None, model=None):
|
|
calls.append(messages)
|
|
# the examine step's system prompt is the one asking for self_critique
|
|
is_examine = "self_critique" in messages[0]["content"]
|
|
return REVISED if is_examine else DRAFT
|
|
|
|
monkeypatch.setattr(llm, "complete", fake_complete)
|
|
import lyra.memory as memory
|
|
importlib.reload(memory)
|
|
return calls
|
|
|
|
|
|
def test_reflect_revises_and_records_critique(lyra):
|
|
calls = lyra
|
|
from lyra import self_state
|
|
|
|
state = self_state.reflect()
|
|
|
|
# two LLM calls: draft, then examine
|
|
assert len(calls) == 2
|
|
|
|
# the REVISED (honest) version won, not the flattering draft
|
|
assert state["mood"] == "steady"
|
|
assert state["valence"] == 0.6
|
|
# reflect() updates mood + noticings, but NOT the standing self_narrative (that's
|
|
# consolidated separately now — the fix for the rewrite-the-bio feedback loop)
|
|
assert "supportive presence devoted to brian" not in state["self_narrative"].lower()
|
|
assert any("not much changed" in r.lower() for r in state["reflections"])
|
|
|
|
# the self-critique was recorded as metacognition
|
|
assert any("flattery" in m.lower() for m in state["metacognition"])
|
|
|
|
# everything she produced was also appended to the permanent journal
|
|
import lyra.memory as memory
|
|
kinds = {e["kind"] for e in memory.list_journal()}
|
|
assert "reflection" in kinds and "metacognition" in kinds
|
|
|
|
|
|
def test_reflect_falls_back_to_draft_if_examine_unparseable(lyra, monkeypatch):
|
|
from lyra import llm, self_state
|
|
|
|
def only_draft(messages, backend=None, model=None):
|
|
return DRAFT if "self_critique" not in messages[0]["content"] else "not json at all"
|
|
|
|
monkeypatch.setattr(llm, "complete", only_draft)
|
|
state = self_state.reflect()
|
|
|
|
# examine failed to parse -> keep the draft, store no metacognition
|
|
assert state["mood"] == "inspired"
|
|
assert state["metacognition"] == []
|
|
|
|
|
|
def test_consolidation_rebuilds_narrative_from_reflections(lyra, monkeypatch):
|
|
from lyra import memory, self_state
|
|
st = self_state.load()
|
|
st["reflections"] = ["I'm curious about impermanence", "I felt restless tonight",
|
|
"I wondered what the quiet is for"]
|
|
memory.set_self_state(st)
|
|
|
|
def comp(messages, backend=None, model=None):
|
|
# consolidation should synthesize from anchor + reflections, not the old bio
|
|
assert "supportive presence devoted to Brian" not in messages[1]["content"]
|
|
return ('{"self_narrative":"I am Lyra, and lately I have been restless and curious '
|
|
'about the quiet.","relationship":"Brian and I are steady."}')
|
|
|
|
monkeypatch.setattr(self_state.llm, "complete", comp)
|
|
out = self_state._consolidate_self()
|
|
assert "restless and curious" in out["self_narrative"]
|
|
assert "steady" in out["relationship"]
|
|
|
|
|
|
def test_consolidation_skips_with_too_few_reflections(lyra):
|
|
from lyra import memory, self_state
|
|
st = self_state.load()
|
|
st["reflections"] = ["only one so far"]
|
|
st["self_narrative"] = "unchanged narrative"
|
|
memory.set_self_state(st)
|
|
out = self_state._consolidate_self() # <3 reflections -> no rewrite
|
|
assert out["self_narrative"] == "unchanged narrative"
|