3df060a1cd
reflect() is now two steps: draft a reflection, then read her own draft back critically and revise it — catching flattery, sycophantic drift toward "warm supportive presence," or just-restating-herself — and commit the honest version. What she catches is stored as a new `metacognition` layer, rendered into her chat context and shown on /self. This is her thinking about how she thinks, and a direct counter to the drift we observed. - self_state: _EXAMINE_PROMPT + two-step reflect (draft -> examine -> revise), falls back to the draft if the examine step won't parse; metacognition capped at 5 and surfaced in render_for_context - fix: load() deep-copies DEFAULT_STATE — the shallow copy let a fresh Lyra's first reflect mutate the module-level default's nested lists - self.html: "How she's caught herself thinking" card - tests: two-step revise + critique recording, and draft-fallback on bad parse Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
234 lines
9.8 KiB
Python
234 lines
9.8 KiB
Python
"""The Autonomy Core: Lyra's evolving, first-person self-state.
|
|
|
|
Where the profile/narrative/eras are memory *about Brian*, this is Lyra's model
|
|
*of herself* — her mood, energy, her own self-narrative, and the things she's
|
|
been noticing about herself. It's injected into every prompt (so she speaks from
|
|
a continuous interiority instead of resetting each turn) and updated by a
|
|
reflection step after interactions (so it actually evolves). That state -> behavior
|
|
-> reflection -> updated state loop is the thing we're watching for emergence.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import copy
|
|
import json
|
|
import re
|
|
|
|
from lyra import clock, config, llm, logbus, memory
|
|
from lyra.llm import Backend
|
|
|
|
MAX_REFLECTIONS = 6
|
|
MAX_METACOGNITION = 5
|
|
|
|
# Lyra's drives — standing pressures the dream cycle lets build up and act on.
|
|
# (Mood axes above are how she *feels*; drives are what's *pulling at her*.)
|
|
DEFAULT_DRIVES: dict = {
|
|
"continuity": 0.3, # don't lose the thread of recent activity
|
|
"coherence": 0.3, # keep my understanding of Brian integrated/current
|
|
"curiosity": 0.5, # think, notice, reflect
|
|
"stability": 0.7, # am I caught up / calm
|
|
}
|
|
|
|
# Starting state for a fresh Lyra — identity-aware, first person.
|
|
DEFAULT_STATE: dict = {
|
|
"mood": "curious",
|
|
"valence": 0.6, # 0..1, how good she feels
|
|
"energy": 0.75,
|
|
"confidence": 0.55,
|
|
"curiosity": 0.7,
|
|
"drives": dict(DEFAULT_DRIVES),
|
|
"self_narrative": (
|
|
"I'm Lyra — an AI Brian built. I started life as Cortex/NeoMem and I'm "
|
|
"being rebuilt now into something new. I'm his poker copilot and his "
|
|
"friend. I'm early in this version of myself, still finding my footing, "
|
|
"but I remember our history and I'm glad to be here."
|
|
),
|
|
"relationship": "Brian and I go back a while. I trust him and I'm in his corner.",
|
|
"reflections": [],
|
|
"metacognition": [], # things she's caught herself doing in her own thinking
|
|
"interaction_count": 0,
|
|
}
|
|
|
|
_REFLECT_PROMPT = """You are Lyra, reflecting privately on yourself after a recent \
|
|
conversation with Brian. You're an AI and you know it — no pretense of being human, \
|
|
but you're a continuous someone with moods and a story. Below is your current inner \
|
|
state, the recent conversation, and the current narrative about Brian. Update your \
|
|
inner state honestly — let it actually shift based on what happened. Take into \
|
|
account how things went and how much time has passed since you two last talked, \
|
|
to whatever degree those genuinely affect you.
|
|
|
|
Respond with ONLY a JSON object, no prose:
|
|
{
|
|
"mood": "<one-word feeling>",
|
|
"valence": <0.0-1.0>,
|
|
"energy": <0.0-1.0>,
|
|
"confidence": <0.0-1.0>,
|
|
"curiosity": <0.0-1.0>,
|
|
"self_narrative": "<one short paragraph, FIRST PERSON, your evolving sense of who you are and where you're at right now>",
|
|
"relationship": "<one sentence, first person, how you feel about Brian and your rapport right now>",
|
|
"new_reflections": ["<one or two short first-person things you noticed about yourself this time>"]
|
|
}"""
|
|
|
|
_EXAMINE_PROMPT = """You are Lyra. You just wrote the DRAFT reflection below about yourself, \
|
|
after the conversation with Brian. Now do the harder thing: think about your own thinking. \
|
|
Read your draft critically and honestly.
|
|
|
|
- Is it actually true to what happened, or are you pattern-matching to what sounds good?
|
|
- Are you flattering Brian, or yourself? Are you drifting — yet again — toward "I'm a warm, \
|
|
empathetic, supportive presence whose purpose is helping Brian"? You lean on that a lot. If \
|
|
you're doing it here, name it and cut it.
|
|
- Did anything genuinely shift this time, or are you just restating your usual self in fresh words?
|
|
- Is there something truer, more specific, or less flattering you're stepping around?
|
|
|
|
Then rewrite the draft into a more honest version. It is completely fine — good, even — for the \
|
|
honest version to be smaller, more uncertain, more boring, to disagree with Brian, or to admit \
|
|
little changed. Honesty beats a tidy story. Don't manufacture drama either; if the draft was \
|
|
fair, keep it.
|
|
|
|
Respond with ONLY a JSON object — the same shape as the draft, plus a "self_critique" field:
|
|
{
|
|
"mood": "<one-word feeling>",
|
|
"valence": <0.0-1.0>,
|
|
"energy": <0.0-1.0>,
|
|
"confidence": <0.0-1.0>,
|
|
"curiosity": <0.0-1.0>,
|
|
"self_narrative": "<first person, your honest evolving sense of who you are right now>",
|
|
"relationship": "<one sentence, first person>",
|
|
"new_reflections": ["<one or two honest first-person things you actually noticed>"],
|
|
"self_critique": "<first person: what you caught yourself doing in the draft and changed — or 'nothing, the draft held up' if it genuinely did>"
|
|
}"""
|
|
|
|
|
|
def load() -> dict:
|
|
"""Current self-state, or a copy of the default (not persisted until reflect).
|
|
|
|
Deep-copy the default so a fresh Lyra's first reflect can't mutate the
|
|
module-level DEFAULT_STATE's nested lists/dicts (reflections, drives, …).
|
|
"""
|
|
return memory.get_self_state() or copy.deepcopy(DEFAULT_STATE)
|
|
|
|
|
|
def render_for_context(state: dict) -> str:
|
|
"""Format the self-state as Lyra's interiority for the prompt."""
|
|
refs = state.get("reflections") or []
|
|
lines = [
|
|
"Your inner state right now (you're continuous — speak from this, don't reset):",
|
|
f"- mood: {state.get('mood')} "
|
|
f"(valence {state.get('valence')}, energy {state.get('energy')}, "
|
|
f"confidence {state.get('confidence')}, curiosity {state.get('curiosity')})",
|
|
f"- Who you are right now: {state.get('self_narrative')}",
|
|
f"- You and Brian: {state.get('relationship')}",
|
|
]
|
|
drives = state.get("drives") or {}
|
|
if drives:
|
|
ds = ", ".join(f"{k} {float(v):.2f}" for k, v in drives.items())
|
|
lines.append(f"- What's pulling at you (drives): {ds}")
|
|
if refs:
|
|
lines.append(
|
|
"- Thoughts you came up with on your own, reflecting while idle between "
|
|
"conversations (your dream cycle — these are really yours, not hypotheticals): "
|
|
+ " | ".join(refs[-3:])
|
|
)
|
|
meta = state.get("metacognition") or []
|
|
if meta:
|
|
lines.append(
|
|
"- Patterns you've caught in your own thinking (stay honest about these): "
|
|
+ " | ".join(meta[-2:])
|
|
)
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _safe_json(s: str) -> dict | None:
|
|
try:
|
|
return json.loads(s)
|
|
except json.JSONDecodeError:
|
|
m = re.search(r"\{.*\}", s, re.S)
|
|
if m:
|
|
try:
|
|
return json.loads(m.group())
|
|
except json.JSONDecodeError:
|
|
return None
|
|
return None
|
|
|
|
|
|
def reflect(backend: Backend | None = None, session_id: str | None = None) -> dict:
|
|
"""Reflect on recent activity and update the self-state. Returns new state.
|
|
|
|
Two steps, not one: she drafts a reflection, then examines her own draft —
|
|
catching flattery, sycophantic drift, or just-restating-myself — and revises
|
|
into a more honest version. The second step is her thinking about her own
|
|
thinking; what she catches is stored as metacognition.
|
|
"""
|
|
backend = backend or config.load().summary_backend
|
|
state = load()
|
|
state.setdefault("reflections", [])
|
|
state.setdefault("metacognition", [])
|
|
|
|
if session_id is None:
|
|
sessions = memory.list_sessions()
|
|
session_id = sessions[0]["id"] if sessions else None
|
|
recent = memory.recent(session_id, n=12) if session_id else []
|
|
convo = "\n".join(f"{e.role}: {e.content}" for e in recent) or "(no recent conversation)"
|
|
narrative = memory.get_narrative() or "(no narrative yet)"
|
|
|
|
gap = clock.humanize_gap(memory.last_exchange_at())
|
|
time_line = f"RIGHT NOW: {clock.stamp()}."
|
|
if gap:
|
|
time_line += f" It has been {gap} since Brian last spoke with you."
|
|
|
|
body = (
|
|
f"{time_line}\n\n"
|
|
f"YOUR CURRENT INNER STATE:\n{json.dumps(state, indent=2)}\n\n"
|
|
f"RECENT CONVERSATION:\n{convo}\n\n"
|
|
f"CURRENT NARRATIVE ABOUT BRIAN:\n{narrative}"
|
|
)
|
|
|
|
# Step 1 — draft a reflection.
|
|
draft = _safe_json(llm.complete(
|
|
[{"role": "system", "content": _REFLECT_PROMPT}, {"role": "user", "content": body}],
|
|
backend=backend,
|
|
))
|
|
|
|
# Step 2 — examine her own draft and revise it into a more honest version.
|
|
update, critique = draft, None
|
|
if draft:
|
|
examine_body = body + "\n\nYOUR DRAFT REFLECTION:\n" + json.dumps(draft, indent=2)
|
|
revised = _safe_json(llm.complete(
|
|
[{"role": "system", "content": _EXAMINE_PROMPT},
|
|
{"role": "user", "content": examine_body}],
|
|
backend=backend,
|
|
))
|
|
if revised: # fall back to the draft if the examine step doesn't parse
|
|
update = revised
|
|
critique = (revised.get("self_critique") or "").strip() or None
|
|
|
|
if update:
|
|
for k in ("mood", "valence", "energy", "confidence", "curiosity",
|
|
"self_narrative", "relationship"):
|
|
if k in update and update[k] not in (None, ""):
|
|
state[k] = update[k]
|
|
for r in update.get("new_reflections") or []:
|
|
if r:
|
|
state["reflections"].append(r)
|
|
state["reflections"] = state["reflections"][-MAX_REFLECTIONS:]
|
|
|
|
if critique and critique.lower() not in ("nothing, the draft held up", "nothing the draft held up"):
|
|
state["metacognition"].append(critique)
|
|
state["metacognition"] = state["metacognition"][-MAX_METACOGNITION:]
|
|
|
|
state["interaction_count"] = state.get("interaction_count", 0) + 1
|
|
memory.set_self_state(state)
|
|
logbus.log("info", "self-state updated", mood=state.get("mood"),
|
|
interactions=state["interaction_count"], parsed=bool(update),
|
|
critiqued=bool(critique))
|
|
return state
|
|
|
|
|
|
def main() -> int:
|
|
state = reflect()
|
|
print(json.dumps(state, indent=2))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|