Files
project-lyra/tests/test_chat.py
T
serversdown 51c2d6abb9 perf: tighten the dynamic prompt — persona split + lean deliberation
The per-turn prompt was ~5.5K tokens (persona alone ~40%), sent up to 3x/turn.
Tightened by RELEVANCE (the control plane decides what each turn needs), not by
deletion — fidelity preserved, focus improved (buried instructions were getting
ignored), tokens roughly halved.

- persona split: core (identity + voice — always) vs situational sections pulled
  in only when relevant. mind._persona_block: self-model/origin only on meta turns
  (generous _META_HINTS), poker guardrails only in poker context (mode/strategic/
  _POKER_HINTS). persona.core_prompt()/section(); system_prompt() kept as fallback.
- lean deliberation: the private 'what do I think' pass now uses a focused context
  (her interiority + recent turns + the message), not the full persona/profile/
  narrative/recall dump. It shapes the take, not the voice.

Measured: casual Talk turn 21,949 -> 15,974 chars (-27%); deliberation 21,949 ->
6,026 (-72%); meta turns still include the self-model. Suite 98 green, ruff clean.

Real retirement of the long prompt is still the fine-tune (mouth); this is the
cheap, high-leverage cut that also improves adherence.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-24 20:48:44 +00:00

124 lines
5.1 KiB
Python

"""The mind pipeline: the deliberation pass (think privately before answering)."""
from __future__ import annotations
import importlib
import pytest
@pytest.fixture
def lyra(tmp_path, monkeypatch):
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
from lyra import llm
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
import lyra.memory as memory
importlib.reload(memory)
import lyra.mind as mind
importlib.reload(mind)
return memory, mind
def test_should_deliberate_skips_trivial(lyra):
_, mind = lyra
assert mind._should_deliberate("How would we actually start building this?")
assert mind._should_deliberate("I disagree, that seems risky")
for trivial in ("ok", "lol", "thanks", "yeah", "nice", "👍", "k"):
assert not mind._should_deliberate(trivial)
assert not mind._should_deliberate("ok!") # punctuation stripped
assert not mind._should_deliberate("hey") # too short
def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
memory, mind = lyra
calls = []
def fake_complete(messages, backend=None, model=None):
calls.append(messages)
return "I actually think the first move is the smallest end-to-end slice."
memory.ensure_session("s1")
monkeypatch.setattr(mind.llm, "complete", fake_complete)
note = mind._deliberation_note("s1", "How would we start on this?", "cloud", None)
assert note and note["role"] == "system"
assert "first move is the smallest" in note["content"] # her thinking carried in
assert "numbered list" in note["content"].lower() # voice enforcement attached
assert len(calls) == 1
def test_deliberation_skipped_when_disabled(lyra, monkeypatch):
_, mind = lyra
monkeypatch.setenv("CHAT_DELIBERATE", "false")
called = []
monkeypatch.setattr(mind.llm, "complete", lambda *a, **k: called.append(1) or "x")
assert mind._deliberation_note("s1", "a real substantive question here", "cloud", None) is None
assert called == [] # no LLM call when off
def test_persona_core_is_tight_situational_is_gated(lyra):
memory, mind = lyra
from lyra import persona
core, full = persona.core_prompt(), persona.system_prompt()
assert "How you talk" in core and "How you actually work" not in core # voice core, self-model not
assert len(core) < len(full) and persona.section("How you actually work")
memory.ensure_session("s1")
casual = " ".join(m["content"] for m in mind.build_messages("s1", "any dinner ideas tonight?")
if m["role"] == "system")
meta = " ".join(m["content"] for m in mind.build_messages("s1", "how does your memory actually work?")
if m["role"] == "system")
assert "How you actually work" not in casual # situational section omitted on a casual turn
assert "How you actually work" in meta # pulled in for a meta question
def test_assemble_runs_the_pipeline(lyra, monkeypatch):
memory, mind = lyra
monkeypatch.setenv("CHAT_DELIBERATE", "false") # keep it offline for the structure test
memory.ensure_session("s1")
turn = mind.assemble("s1", "hey what's up", "cloud", None)
assert turn.mode is not None # route ran
assert turn.messages and turn.messages[-1]["role"] == "user" # compose ran
assert turn.messages[-1]["content"] == "hey what's up"
# --- mind/mouth split (P3) ----------------------------------------------
def test_mouth_target_off_by_default(monkeypatch):
import importlib
from lyra import config
monkeypatch.delenv("MOUTH_BACKEND", raising=False)
monkeypatch.delenv("MOUTH_MODEL", raising=False)
import lyra.chat as chat
importlib.reload(chat)
assert chat._mouth_target(config.load(), "cloud", "gpt-4o") is None # mouth == mind
def test_mouth_target_when_configured(monkeypatch):
import importlib
from lyra import config
monkeypatch.setenv("MOUTH_BACKEND", "local")
monkeypatch.setenv("MOUTH_MODEL", "dolphin3:8b")
import lyra.chat as chat
importlib.reload(chat)
assert chat._mouth_target(config.load(), "cloud", "gpt-4o") == ("local", "dolphin3:8b")
def test_voice_messages_carries_draft_and_instruction(lyra):
_, mind = lyra
out = mind.voice_messages([{"role": "user", "content": "hi"}], "draft with FACT 42")
assert out[-2] == {"role": "assistant", "content": "draft with FACT 42"}
assert out[-1]["role"] == "system" and "your own voice" in out[-1]["content"].lower()
def test_voice_pass_revoices_then_falls_back(lyra, monkeypatch):
_, mind = lyra
import importlib
import lyra.chat as chat
importlib.reload(chat)
monkeypatch.setattr(chat.llm, "complete", lambda msgs, backend=None, model=None: "voiced (FACT 42)")
assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "voiced (FACT 42)"
# on failure it keeps the mind's draft (chat must not break)
def boom(*a, **k):
raise RuntimeError("mouth down")
monkeypatch.setattr(chat.llm, "complete", boom)
assert chat._voice_pass([], "draft FACT 42", "local", "dolphin3:8b") == "draft FACT 42"