51c2d6abb9
The per-turn prompt was ~5.5K tokens (persona alone ~40%), sent up to 3x/turn. Tightened by RELEVANCE (the control plane decides what each turn needs), not by deletion — fidelity preserved, focus improved (buried instructions were getting ignored), tokens roughly halved. - persona split: core (identity + voice — always) vs situational sections pulled in only when relevant. mind._persona_block: self-model/origin only on meta turns (generous _META_HINTS), poker guardrails only in poker context (mode/strategic/ _POKER_HINTS). persona.core_prompt()/section(); system_prompt() kept as fallback. - lean deliberation: the private 'what do I think' pass now uses a focused context (her interiority + recent turns + the message), not the full persona/profile/ narrative/recall dump. It shapes the take, not the voice. Measured: casual Talk turn 21,949 -> 15,974 chars (-27%); deliberation 21,949 -> 6,026 (-72%); meta turns still include the self-model. Suite 98 green, ruff clean. Real retirement of the long prompt is still the fine-tune (mouth); this is the cheap, high-leverage cut that also improves adherence. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
124 lines
5.1 KiB
Python
124 lines
5.1 KiB
Python
"""The mind pipeline: the deliberation pass (think privately before answering)."""
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def lyra(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
|
|
from lyra import llm
|
|
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
|
|
import lyra.memory as memory
|
|
importlib.reload(memory)
|
|
import lyra.mind as mind
|
|
importlib.reload(mind)
|
|
return memory, mind
|
|
|
|
|
|
def test_should_deliberate_skips_trivial(lyra):
|
|
_, mind = lyra
|
|
assert mind._should_deliberate("How would we actually start building this?")
|
|
assert mind._should_deliberate("I disagree, that seems risky")
|
|
for trivial in ("ok", "lol", "thanks", "yeah", "nice", "👍", "k"):
|
|
assert not mind._should_deliberate(trivial)
|
|
assert not mind._should_deliberate("ok!") # punctuation stripped
|
|
assert not mind._should_deliberate("hey") # too short
|
|
|
|
|
|
def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
|
|
memory, mind = lyra
|
|
calls = []
|
|
|
|
def fake_complete(messages, backend=None, model=None):
|
|
calls.append(messages)
|
|
return "I actually think the first move is the smallest end-to-end slice."
|
|
|
|
memory.ensure_session("s1")
|
|
monkeypatch.setattr(mind.llm, "complete", fake_complete)
|
|
note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None)
|
|
assert note and note["role"] == "system"
|
|
assert "first move is the smallest" in note["content"] # her thinking carried in
|
|
assert "numbered list" in note["content"].lower() # voice enforcement attached
|
|
assert len(calls) == 1
|
|
|
|
|
|
def test_deliberation_skipped_when_disabled(lyra, monkeypatch):
|
|
_, mind = lyra
|
|
monkeypatch.setenv("CHAT_DELIBERATE", "false")
|
|
called = []
|
|
monkeypatch.setattr(mind.llm, "complete", lambda *a, **k: called.append(1) or "x")
|
|
assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None) is None
|
|
assert called == [] # no LLM call when off
|
|
|
|
|
|
def test_persona_core_is_tight_situational_is_gated(lyra):
|
|
memory, mind = lyra
|
|
from lyra import persona
|
|
core, full = persona.core_prompt(), persona.system_prompt()
|
|
assert "How you talk" in core and "How you actually work" not in core # voice core, self-model not
|
|
assert len(core) < len(full) and persona.section("How you actually work")
|
|
|
|
memory.ensure_session("s1")
|
|
casual = " ".join(m["content"] for m in mind.build_messages("s1", "any dinner ideas tonight?")
|
|
if m["role"] == "system")
|
|
meta = " ".join(m["content"] for m in mind.build_messages("s1", "how does your memory actually work?")
|
|
if m["role"] == "system")
|
|
assert "How you actually work" not in casual # situational section omitted on a casual turn
|
|
assert "How you actually work" in meta # pulled in for a meta question
|
|
|
|
|
|
def test_assemble_runs_the_pipeline(lyra, monkeypatch):
|
|
memory, mind = lyra
|
|
monkeypatch.setenv("CHAT_DELIBERATE", "false") # keep it offline for the structure test
|
|
memory.ensure_session("s1")
|
|
turn = mind.assemble("s1", "hey what's up", "cloud", None)
|
|
assert turn.mode is not None # route ran
|
|
assert turn.messages and turn.messages[-1]["role"] == "user" # compose ran
|
|
assert turn.messages[-1]["content"] == "hey what's up"
|
|
|
|
|
|
# --- mind/mouth split (P3) ----------------------------------------------
|
|
|
|
def test_mouth_target_off_by_default(monkeypatch):
|
|
import importlib
|
|
from lyra import config
|
|
monkeypatch.delenv("MOUTH_BACKEND", raising=False)
|
|
monkeypatch.delenv("MOUTH_MODEL", raising=False)
|
|
import lyra.chat as chat
|
|
importlib.reload(chat)
|
|
assert chat._mouth_target(config.load(), "cloud", "gpt-4o") is None # mouth == mind
|
|
|
|
|
|
def test_mouth_target_when_configured(monkeypatch):
|
|
import importlib
|
|
from lyra import config
|
|
monkeypatch.setenv("MOUTH_BACKEND", "local")
|
|
monkeypatch.setenv("MOUTH_MODEL", "dolphin3:8b")
|
|
import lyra.chat as chat
|
|
importlib.reload(chat)
|
|
assert chat._mouth_target(config.load(), "cloud", "gpt-4o") == ("local", "dolphin3:8b")
|
|
|
|
|
|
def test_voice_messages_carries_draft_and_instruction(lyra):
|
|
_, mind = lyra
|
|
out = mind.voice_messages([{"role": "user", "content": "hi"}], "draft with FACT 42")
|
|
assert out[-2] == {"role": "assistant", "content": "draft with FACT 42"}
|
|
assert out[-1]["role"] == "system" and "your own voice" in out[-1]["content"].lower()
|
|
|
|
|
|
def test_voice_pass_revoices_then_falls_back(lyra, monkeypatch):
|
|
_, mind = lyra
|
|
import importlib
|
|
import lyra.chat as chat
|
|
importlib.reload(chat)
|
|
monkeypatch.setattr(chat.llm, "complete", lambda msgs, backend=None, model=None: "voiced (FACT 42)")
|
|
assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "voiced (FACT 42)"
|
|
# on failure it keeps the mind's draft (chat must not break)
|
|
def boom(*a, **k):
|
|
raise RuntimeError("mouth down")
|
|
monkeypatch.setattr(chat.llm, "complete", boom)
|
|
assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "draft FACT 42"
|