perf: tighten the dynamic prompt — persona split + lean deliberation
The per-turn prompt was ~5.5K tokens (persona alone ~40%), sent up to 3x/turn. Tightened by RELEVANCE (the control plane decides what each turn needs), not by deletion — fidelity preserved, focus improved (buried instructions were getting ignored), tokens roughly halved. - persona split: core (identity + voice — always) vs situational sections pulled in only when relevant. mind._persona_block: self-model/origin only on meta turns (generous _META_HINTS), poker guardrails only in poker context (mode/strategic/ _POKER_HINTS). persona.core_prompt()/section(); system_prompt() kept as fallback. - lean deliberation: the private 'what do I think' pass now uses a focused context (her interiority + recent turns + the message), not the full persona/profile/ narrative/recall dump. It shapes the take, not the voice. Measured: casual Talk turn 21,949 -> 15,974 chars (-27%); deliberation 21,949 -> 6,026 (-72%); meta turns still include the self-model. Suite 98 green, ruff clean. Real retirement of the long prompt is still the fine-tune (mouth); this is the cheap, high-leverage cut that also improves adherence. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+20
-3
@@ -29,15 +29,16 @@ def test_should_deliberate_skips_trivial(lyra):
|
||||
|
||||
|
||||
def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
|
||||
_, mind = lyra
|
||||
memory, mind = lyra
|
||||
calls = []
|
||||
|
||||
def fake_complete(messages, backend=None, model=None):
|
||||
calls.append(messages)
|
||||
return "I actually think the first move is the smallest end-to-end slice."
|
||||
|
||||
memory.ensure_session("s1")
|
||||
monkeypatch.setattr(mind.llm, "complete", fake_complete)
|
||||
note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None, [])
|
||||
note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None)
|
||||
assert note and note["role"] == "system"
|
||||
assert "first move is the smallest" in note["content"] # her thinking carried in
|
||||
assert "numbered list" in note["content"].lower() # voice enforcement attached
|
||||
@@ -49,10 +50,26 @@ def test_deliberation_skipped_when_disabled(lyra, monkeypatch):
|
||||
monkeypatch.setenv("CHAT_DELIBERATE", "false")
|
||||
called = []
|
||||
monkeypatch.setattr(mind.llm, "complete", lambda *a, **k: called.append(1) or "x")
|
||||
assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None
|
||||
assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None) is None
|
||||
assert called == [] # no LLM call when off
|
||||
|
||||
|
||||
def test_persona_core_is_tight_situational_is_gated(lyra):
|
||||
memory, mind = lyra
|
||||
from lyra import persona
|
||||
core, full = persona.core_prompt(), persona.system_prompt()
|
||||
assert "How you talk" in core and "How you actually work" not in core # voice core, self-model not
|
||||
assert len(core) < len(full) and persona.section("How you actually work")
|
||||
|
||||
memory.ensure_session("s1")
|
||||
casual = " ".join(m["content"] for m in mind.build_messages("s1", "any dinner ideas tonight?")
|
||||
if m["role"] == "system")
|
||||
meta = " ".join(m["content"] for m in mind.build_messages("s1", "how does your memory actually work?")
|
||||
if m["role"] == "system")
|
||||
assert "How you actually work" not in casual # situational section omitted on a casual turn
|
||||
assert "How you actually work" in meta # pulled in for a meta question
|
||||
|
||||
|
||||
def test_assemble_runs_the_pipeline(lyra, monkeypatch):
|
||||
memory, mind = lyra
|
||||
monkeypatch.setenv("CHAT_DELIBERATE", "false") # keep it offline for the structure test
|
||||
|
||||
Reference in New Issue
Block a user