feat: live chat deliberation — think privately before answering (less 'meh')
The chat had no thinking in it: respond() was a single gpt-4o call in default- assistant voice (numbered lists, 'would you like to...', vague). All the cognition work was background-only. This brings a thought step into the conversation. - chat: before answering a substantive turn (trivial 'ok/lol' skipped), a private _deliberate() pass — "what do you ACTUALLY think, your real take, the substance, no pleasantries" — drawing on her in-context threads/journal. The thinking is then injected as the LAST system note with voice enforcement (answer from this; no numbered list / how-to outline unless asked; no 'would you like to' closer), so it beats gpt-4o's boilerplate at the most influential position. Logged to /logs. - Wired into respond() + respond_stream(). Config CHAT_DELIBERATE (default on) to disable if the extra call's latency annoys. - persona: "talk, don't outline" — prose over listicles, the first concrete move over a survey of options. - test_chat.py (gating + note composition + disabled). Suite 84, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,53 @@
|
||||
"""Live chat: the deliberation pass (think privately before answering)."""
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def lyra(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
|
||||
from lyra import llm
|
||||
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
|
||||
import lyra.memory as memory
|
||||
importlib.reload(memory)
|
||||
import lyra.chat as chat
|
||||
importlib.reload(chat)
|
||||
return memory, chat
|
||||
|
||||
|
||||
def test_should_deliberate_skips_trivial(lyra):
|
||||
_, chat = lyra
|
||||
assert chat._should_deliberate("How would we actually start building this?")
|
||||
assert chat._should_deliberate("I disagree, that seems risky")
|
||||
for trivial in ("ok", "lol", "thanks", "yeah", "nice", "👍", "k"):
|
||||
assert not chat._should_deliberate(trivial)
|
||||
assert not chat._should_deliberate("ok!") # punctuation stripped
|
||||
assert not chat._should_deliberate("hey") # too short
|
||||
|
||||
|
||||
def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
|
||||
_, chat = lyra
|
||||
calls = []
|
||||
|
||||
def fake_complete(messages, backend=None, model=None):
|
||||
calls.append(messages)
|
||||
return "I actually think the first move is the smallest end-to-end slice."
|
||||
|
||||
monkeypatch.setattr(chat.llm, "complete", fake_complete)
|
||||
note = chat._deliberation_note("s1", "How would we start on this?", "cloud", None, [])
|
||||
assert note and note["role"] == "system"
|
||||
assert "first move is the smallest" in note["content"] # her thinking carried in
|
||||
assert "numbered list" in note["content"].lower() # voice enforcement attached
|
||||
assert len(calls) == 1
|
||||
|
||||
|
||||
def test_deliberation_skipped_when_disabled(lyra, monkeypatch):
|
||||
_, chat = lyra
|
||||
monkeypatch.setenv("CHAT_DELIBERATE", "false")
|
||||
called = []
|
||||
monkeypatch.setattr(chat.llm, "complete", lambda *a, **k: called.append(1) or "x")
|
||||
assert chat._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None
|
||||
assert called == [] # no LLM call when off
|
||||
Reference in New Issue
Block a user