feat: live chat deliberation — think privately before answering (less 'meh')

The chat had no thinking in it: respond() was a single gpt-4o call in default- assistant voice (numbered lists, 'would you like to...', vague). All the cognition work was background-only. This brings a thought step into the conversation. - chat: before answering a substantive turn (trivial 'ok/lol' skipped), a private _deliberate() pass — "what do you ACTUALLY think, your real take, the substance, no pleasantries" — drawing on her in-context threads/journal. The thinking is then injected as the LAST system note with voice enforcement (answer from this; no numbered list / how-to outline unless asked; no 'would you like to' closer), so it beats gpt-4o's boilerplate at the most influential position. Logged to /logs. - Wired into respond() + respond_stream(). Config CHAT_DELIBERATE (default on) to disable if the extra call's latency annoys. - persona: "talk, don't outline" — prose over listicles, the first concrete move over a survey of options. - test_chat.py (gating + note composition + disabled). Suite 84, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-23 00:35:49 +00:00
parent ea30c3dd67
commit 97afa82594
5 changed files with 126 additions and 0 deletions
@@ -101,6 +101,61 @@ def _render(messages: list[Message]) -> str:
    return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)


+# Trivial acknowledgements that don't warrant a private thinking pass.
+_TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah",
+            "yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"}
+
+
+def _should_deliberate(user_msg: str) -> bool:
+    m = user_msg.strip().lower().rstrip("!.?")
+    return len(m) >= 12 and m not in _TRIVIAL
+
+
+_DELIBERATE_SYS = (
+    "Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY "
+    "think about what he just said? Your real take, the specific substance worth giving, any "
+    "genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and "
+    "what you actually know if they're relevant. Be concrete; skip pleasantries and generic "
+    "enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking."
+)
+
+
+def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str:
+    """One private 'what do I actually think' pass before replying. Returns her thinking
+    (empty on any failure — chat must never break because deliberation hiccuped)."""
+    try:
+        out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}],
+                           backend=backend, model=model)
+        return (out or "").strip()
+    except Exception as exc:
+        logbus.log("error", "deliberation failed", error=str(exc)[:160])
+        return ""
+
+
+def _answer_from(thinking: str) -> Message:
+    """The system note that turns private thinking into a grounded, in-voice reply — placed
+    last (most influential) to beat gpt-4o's default-assistant boilerplate."""
+    return {"role": "system", "content": (
+        "Your private thinking just now (Brian can't see it):\n" + thinking +
+        "\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, "
+        "specific, opinionated. Give the actual substance, not a survey of options. Do NOT "
+        "default to a numbered list or a how-to outline unless he explicitly asked for steps. "
+        "No 'would you like to…' / 'let me know' closer — make your point and stop."
+    )}
+
+
+def _deliberation_note(session_id: str, user_msg: str, backend: Backend,
+                       model: str | None, messages: list[Message]) -> Message | None:
+    """Run the private thinking pass if warranted; return the answer-from-thinking note."""
+    if not config.load().chat_deliberate or not _should_deliberate(user_msg):
+        return None
+    thinking = _deliberate(messages, backend, model)
+    if not thinking:
+        return None
+    logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking)
+    return _answer_from(thinking)
+
+
 def build_messages(session_id: str, user_msg: str,
                   mode: modes.Mode | None = None) -> list[Message]:
    """Assemble the full, tiered message list for one turn."""
@@ -211,6 +266,11 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
    mode = modes.get(memory.get_session_mode(session_id))
    messages = build_messages(session_id, user_msg, mode=mode)

+    # Live thought loop: think privately about what to actually say before answering.
+    note = _deliberation_note(session_id, user_msg, backend, model, messages)
+    if note:
+        messages.append(note)
+
    # Tool loop: offer Lyra her tools (scoped to the mode); if she calls one, run it
    # and feed the result back so she can continue, until she returns a text reply.
    tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
@@ -262,6 +322,12 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",

    mode = modes.get(memory.get_session_mode(session_id))
    messages = build_messages(session_id, user_msg, mode=mode)
+
+    # Live thought loop: think privately about what to actually say before answering.
+    note = _deliberation_note(session_id, user_msg, backend, model, messages)
+    if note:
+        messages.append(note)
+
    tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
    ctx = {"session_id": session_id, "backend": backend}
    parts: list[str] = []