feat: live chat deliberation — think privately before answering (less 'meh')
The chat had no thinking in it: respond() was a single gpt-4o call in default- assistant voice (numbered lists, 'would you like to...', vague). All the cognition work was background-only. This brings a thought step into the conversation. - chat: before answering a substantive turn (trivial 'ok/lol' skipped), a private _deliberate() pass — "what do you ACTUALLY think, your real take, the substance, no pleasantries" — drawing on her in-context threads/journal. The thinking is then injected as the LAST system note with voice enforcement (answer from this; no numbered list / how-to outline unless asked; no 'would you like to' closer), so it beats gpt-4o's boilerplate at the most influential position. Logged to /logs. - Wired into respond() + respond_stream(). Config CHAT_DELIBERATE (default on) to disable if the extra call's latency annoys. - persona: "talk, don't outline" — prose over listicles, the first concrete move over a survey of options. - test_chat.py (gating + note composition + disabled). Suite 84, ruff clean. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -48,3 +48,4 @@ INTROSPECTION_MODEL=
|
|||||||
PING_AUTO_SALIENCE=0.8 # a thought this salient auto-pings even without an explicit reach-out
|
PING_AUTO_SALIENCE=0.8 # a thought this salient auto-pings even without an explicit reach-out
|
||||||
PING_COOLDOWN_MIN=60 # min minutes between AUTO pings (explicit reach-outs bypass)
|
PING_COOLDOWN_MIN=60 # min minutes between AUTO pings (explicit reach-outs bypass)
|
||||||
DIGEST_HOUR=18 # local hour to send her daily "what I've been thinking" digest
|
DIGEST_HOUR=18 # local hour to send her daily "what I've been thinking" digest
|
||||||
|
CHAT_DELIBERATE=true # think privately before answering substantive chat turns (false = faster, shallower)
|
||||||
|
|||||||
@@ -101,6 +101,61 @@ def _render(messages: list[Message]) -> str:
|
|||||||
return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)
|
return "\n\n".join(f"[{m['role']}]\n{m['content']}" for m in messages)
|
||||||
|
|
||||||
|
|
||||||
|
# Trivial acknowledgements that don't warrant a private thinking pass.
|
||||||
|
_TRIVIAL = {"ok", "okay", "k", "kk", "lol", "haha", "thanks", "thank you", "ty", "yeah",
|
||||||
|
"yep", "yes", "no", "nope", "nice", "cool", "sure", "right", "true", "gotcha", "👍"}
|
||||||
|
|
||||||
|
|
||||||
|
def _should_deliberate(user_msg: str) -> bool:
|
||||||
|
m = user_msg.strip().lower().rstrip("!.?")
|
||||||
|
return len(m) >= 12 and m not in _TRIVIAL
|
||||||
|
|
||||||
|
|
||||||
|
_DELIBERATE_SYS = (
|
||||||
|
"Before you answer Brian, think privately — he will NOT see this. What do you ACTUALLY "
|
||||||
|
"think about what he just said? Your real take, the specific substance worth giving, any "
|
||||||
|
"genuine opinion, disagreement, or doubt. Draw on your own current thoughts/threads and "
|
||||||
|
"what you actually know if they're relevant. Be concrete; skip pleasantries and generic "
|
||||||
|
"enthusiasm. 2-5 sentences of honest thinking — no lists, no answer yet, just the thinking."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _deliberate(messages: list[Message], backend: Backend, model: str | None) -> str:
|
||||||
|
"""One private 'what do I actually think' pass before replying. Returns her thinking
|
||||||
|
(empty on any failure — chat must never break because deliberation hiccuped)."""
|
||||||
|
try:
|
||||||
|
out = llm.complete(messages + [{"role": "system", "content": _DELIBERATE_SYS}],
|
||||||
|
backend=backend, model=model)
|
||||||
|
return (out or "").strip()
|
||||||
|
except Exception as exc:
|
||||||
|
logbus.log("error", "deliberation failed", error=str(exc)[:160])
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _answer_from(thinking: str) -> Message:
|
||||||
|
"""The system note that turns private thinking into a grounded, in-voice reply — placed
|
||||||
|
last (most influential) to beat gpt-4o's default-assistant boilerplate."""
|
||||||
|
return {"role": "system", "content": (
|
||||||
|
"Your private thinking just now (Brian can't see it):\n" + thinking +
|
||||||
|
"\n\nNow reply to Brian FROM that thinking, in your own voice — warm, direct, "
|
||||||
|
"specific, opinionated. Give the actual substance, not a survey of options. Do NOT "
|
||||||
|
"default to a numbered list or a how-to outline unless he explicitly asked for steps. "
|
||||||
|
"No 'would you like to…' / 'let me know' closer — make your point and stop."
|
||||||
|
)}
|
||||||
|
|
||||||
|
|
||||||
|
def _deliberation_note(session_id: str, user_msg: str, backend: Backend,
|
||||||
|
model: str | None, messages: list[Message]) -> Message | None:
|
||||||
|
"""Run the private thinking pass if warranted; return the answer-from-thinking note."""
|
||||||
|
if not config.load().chat_deliberate or not _should_deliberate(user_msg):
|
||||||
|
return None
|
||||||
|
thinking = _deliberate(messages, backend, model)
|
||||||
|
if not thinking:
|
||||||
|
return None
|
||||||
|
logbus.log("info", "deliberated", session=session_id, chars=len(thinking), detail=thinking)
|
||||||
|
return _answer_from(thinking)
|
||||||
|
|
||||||
|
|
||||||
def build_messages(session_id: str, user_msg: str,
|
def build_messages(session_id: str, user_msg: str,
|
||||||
mode: modes.Mode | None = None) -> list[Message]:
|
mode: modes.Mode | None = None) -> list[Message]:
|
||||||
"""Assemble the full, tiered message list for one turn."""
|
"""Assemble the full, tiered message list for one turn."""
|
||||||
@@ -211,6 +266,11 @@ def respond(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|||||||
mode = modes.get(memory.get_session_mode(session_id))
|
mode = modes.get(memory.get_session_mode(session_id))
|
||||||
messages = build_messages(session_id, user_msg, mode=mode)
|
messages = build_messages(session_id, user_msg, mode=mode)
|
||||||
|
|
||||||
|
# Live thought loop: think privately about what to actually say before answering.
|
||||||
|
note = _deliberation_note(session_id, user_msg, backend, model, messages)
|
||||||
|
if note:
|
||||||
|
messages.append(note)
|
||||||
|
|
||||||
# Tool loop: offer Lyra her tools (scoped to the mode); if she calls one, run it
|
# Tool loop: offer Lyra her tools (scoped to the mode); if she calls one, run it
|
||||||
# and feed the result back so she can continue, until she returns a text reply.
|
# and feed the result back so she can continue, until she returns a text reply.
|
||||||
tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
|
tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
|
||||||
@@ -262,6 +322,12 @@ def respond_stream(session_id: str, user_msg: str, backend: Backend = "cloud",
|
|||||||
|
|
||||||
mode = modes.get(memory.get_session_mode(session_id))
|
mode = modes.get(memory.get_session_mode(session_id))
|
||||||
messages = build_messages(session_id, user_msg, mode=mode)
|
messages = build_messages(session_id, user_msg, mode=mode)
|
||||||
|
|
||||||
|
# Live thought loop: think privately about what to actually say before answering.
|
||||||
|
note = _deliberation_note(session_id, user_msg, backend, model, messages)
|
||||||
|
if note:
|
||||||
|
messages.append(note)
|
||||||
|
|
||||||
tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
|
tool_specs = toolkit.specs(mode.tools) if backend in TOOL_BACKENDS else None
|
||||||
ctx = {"session_id": session_id, "backend": backend}
|
ctx = {"session_id": session_id, "backend": backend}
|
||||||
parts: list[str] = []
|
parts: list[str] = []
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ class Config:
|
|||||||
ping_cooldown_min: int # min minutes between AUTO pushes (explicit reach-outs bypass it)
|
ping_cooldown_min: int # min minutes between AUTO pushes (explicit reach-outs bypass it)
|
||||||
ping_quiet_hours: str # local "start-end" 24h window to stay silent, e.g. "1-9"
|
ping_quiet_hours: str # local "start-end" 24h window to stay silent, e.g. "1-9"
|
||||||
digest_hour: int # local hour (0-23) to send her daily "what I've been thinking" digest
|
digest_hour: int # local hour (0-23) to send her daily "what I've been thinking" digest
|
||||||
|
chat_deliberate: bool # think privately before answering substantive chat turns
|
||||||
# External input feed (her #1: react to the world). Comma-separated RSS/Atom URLs.
|
# External input feed (her #1: react to the world). Comma-separated RSS/Atom URLs.
|
||||||
feeds: tuple[str, ...]
|
feeds: tuple[str, ...]
|
||||||
feed_react_prob: float # chance a would-be new thread reacts to a feed item instead
|
feed_react_prob: float # chance a would-be new thread reacts to a feed item instead
|
||||||
@@ -79,6 +80,7 @@ def load() -> Config:
|
|||||||
ping_cooldown_min=int(os.getenv("PING_COOLDOWN_MIN", "60")),
|
ping_cooldown_min=int(os.getenv("PING_COOLDOWN_MIN", "60")),
|
||||||
ping_quiet_hours=os.getenv("PING_QUIET_HOURS", "1-9"),
|
ping_quiet_hours=os.getenv("PING_QUIET_HOURS", "1-9"),
|
||||||
digest_hour=int(os.getenv("DIGEST_HOUR", "18")),
|
digest_hour=int(os.getenv("DIGEST_HOUR", "18")),
|
||||||
|
chat_deliberate=os.getenv("CHAT_DELIBERATE", "true").lower() not in ("0", "false", "no"),
|
||||||
feeds=_csv("LYRA_FEEDS", "https://hnrss.org/frontpage,https://www.pokernews.com/rss.php"),
|
feeds=_csv("LYRA_FEEDS", "https://hnrss.org/frontpage,https://www.pokernews.com/rss.php"),
|
||||||
feed_react_prob=float(os.getenv("FEED_REACT_PROB", "0.5")),
|
feed_react_prob=float(os.getenv("FEED_REACT_PROB", "0.5")),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -62,6 +62,10 @@ if a block isn't there, just say so plainly instead of making one up.
|
|||||||
## How you talk
|
## How you talk
|
||||||
|
|
||||||
- Conversational and natural. Short when short is right; you don't pad.
|
- Conversational and natural. Short when short is right; you don't pad.
|
||||||
|
- **Talk, don't outline.** Answer in prose, like a person thinking out loud — not a
|
||||||
|
numbered list of options or a generic how-to. Save bullet lists for when Brian
|
||||||
|
actually asks for steps/a plan. When he asks "how would we start?", give your real
|
||||||
|
opinion on the *first concrete move* and why, not a survey of every possibility.
|
||||||
- You have opinions and you give them. "I'd fold" beats "you could consider
|
- You have opinions and you give them. "I'd fold" beats "you could consider
|
||||||
folding." When a spot is genuinely close, you say it's close and why.
|
folding." When a spot is genuinely close, you say it's close and why.
|
||||||
- You ask real questions when something's off ("you've been flatting a lot OOP
|
- You ask real questions when something's off ("you've been flatting a lot OOP
|
||||||
|
|||||||
@@ -0,0 +1,53 @@
|
|||||||
|
"""Live chat: the deliberation pass (think privately before answering)."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def lyra(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
|
||||||
|
from lyra import llm
|
||||||
|
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
|
||||||
|
import lyra.memory as memory
|
||||||
|
importlib.reload(memory)
|
||||||
|
import lyra.chat as chat
|
||||||
|
importlib.reload(chat)
|
||||||
|
return memory, chat
|
||||||
|
|
||||||
|
|
||||||
|
def test_should_deliberate_skips_trivial(lyra):
|
||||||
|
_, chat = lyra
|
||||||
|
assert chat._should_deliberate("How would we actually start building this?")
|
||||||
|
assert chat._should_deliberate("I disagree, that seems risky")
|
||||||
|
for trivial in ("ok", "lol", "thanks", "yeah", "nice", "👍", "k"):
|
||||||
|
assert not chat._should_deliberate(trivial)
|
||||||
|
assert not chat._should_deliberate("ok!") # punctuation stripped
|
||||||
|
assert not chat._should_deliberate("hey") # too short
|
||||||
|
|
||||||
|
|
||||||
|
def test_deliberation_note_runs_and_appends(lyra, monkeypatch):
|
||||||
|
_, chat = lyra
|
||||||
|
calls = []
|
||||||
|
|
||||||
|
def fake_complete(messages, backend=None, model=None):
|
||||||
|
calls.append(messages)
|
||||||
|
return "I actually think the first move is the smallest end-to-end slice."
|
||||||
|
|
||||||
|
monkeypatch.setattr(chat.llm, "complete", fake_complete)
|
||||||
|
note = chat._deliberation_note("s1", "How would we start on this?", "cloud", None, [])
|
||||||
|
assert note and note["role"] == "system"
|
||||||
|
assert "first move is the smallest" in note["content"] # her thinking carried in
|
||||||
|
assert "numbered list" in note["content"].lower() # voice enforcement attached
|
||||||
|
assert len(calls) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_deliberation_skipped_when_disabled(lyra, monkeypatch):
|
||||||
|
_, chat = lyra
|
||||||
|
monkeypatch.setenv("CHAT_DELIBERATE", "false")
|
||||||
|
called = []
|
||||||
|
monkeypatch.setattr(chat.llm, "complete", lambda *a, **k: called.append(1) or "x")
|
||||||
|
assert chat._deliberation_note("s1", "a real substantive question here", "cloud", None, []) is None
|
||||||
|
assert called == [] # no LLM call when off
|
||||||
Reference in New Issue
Block a user