feat: behind-the-scenes 👍/👎 rating system (fine-tune data collection)

Brian can rate Lyra's outputs as he uses her; each rating is stored as a (context, content, rating) triple — the shape a future fine-tune / preference dataset wants, collected passively during real use. - memory: ratings table + add_rating (upsert: one row per item, re-rating replaces), list_ratings, rating_counts - server: POST /rate, GET /ratings/counts, GET /ratings/export (JSONL download) - chat UI: subtle 👍/👎 on each assistant reply, captures the prompting message as context - journal/reflection UI: 👍/👎 on each thought - tests: counts + upsert-replace behavior Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-18 19:32:27 +00:00
parent 9befe4d403
commit 4f770f2e43
6 changed files with 173 additions and 1 deletions
@@ -92,6 +92,21 @@ CREATE TABLE IF NOT EXISTS journal (
    source TEXT
 );
 CREATE INDEX IF NOT EXISTS idx_journal_created ON journal(created_at);
 -- Brian's behind-the-scenes feedback on Lyra's outputs (chat replies, reflections,
 -- journal/metacognition). Stored as (context, content, rating) — the shape a future
 -- fine-tune / preference dataset wants. One row per rated item (re-rating updates it).
 CREATE TABLE IF NOT EXISTS ratings (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    created_at TEXT NOT NULL,
    kind TEXT NOT NULL,        -- chat | reflection | metacognition | journal
    rating INTEGER NOT NULL,   -- +1 (good / want more) or -1 (off / want less)
    content TEXT NOT NULL,     -- the rated output
    context TEXT,              -- what prompted it (e.g. the user message for a chat reply)
    ref TEXT,                  -- optional source id (journal id, session id, ...)
    note TEXT
 );
 CREATE INDEX IF NOT EXISTS idx_ratings_created ON ratings(created_at);
 """
 _conn: sqlite3.Connection | None = None
@@ -542,6 +557,41 @@ def add_journal_entry(kind: str, content: str, source: str | None = None) -> int
    return int(cur.lastrowid)
 def add_rating(kind: str, rating: int, content: str, context: str | None = None,
               ref: str | None = None, note: str | None = None) -> int:
    """Record (or replace) Brian's feedback on one Lyra output. One row per item:
    re-rating the same content updates it. Returns row id."""
    now = datetime.now(timezone.utc).isoformat()
    conn = _connection()
    with conn:
        conn.execute("DELETE FROM ratings WHERE kind = ? AND content = ?", (kind, content))
        cur = conn.execute(
            "INSERT INTO ratings (created_at, kind, rating, content, context, ref, note) "
            "VALUES (?, ?, ?, ?, ?, ?, ?)",
            (now, kind, 1 if rating >= 0 else -1, content, context,
             str(ref) if ref is not None else None, note),
        )
    return int(cur.lastrowid)
 def list_ratings(limit: int | None = None) -> list[dict]:
    conn = _connection()
    sql = "SELECT id, created_at, kind, rating, content, context, ref, note FROM ratings ORDER BY id DESC"
    if limit is not None:
        sql += f" LIMIT {int(limit)}"
    return [dict(r) for r in conn.execute(sql).fetchall()]
 def rating_counts() -> dict:
    conn = _connection()
    r = conn.execute(
        "SELECT COUNT(*) AS total, "
        "COALESCE(SUM(CASE WHEN rating > 0 THEN 1 ELSE 0 END), 0) AS up, "
        "COALESCE(SUM(CASE WHEN rating < 0 THEN 1 ELSE 0 END), 0) AS down FROM ratings"
    ).fetchone()
    return {"total": r["total"], "up": r["up"], "down": r["down"]}
 def list_journal(limit: int | None = None, kinds: tuple[str, ...] | None = None) -> list[dict]:
    """Journal entries, newest first. Optionally filter by kind."""
    conn = _connection()
@@ -142,6 +142,32 @@ def create_app() -> FastAPI:
    async def journal_data(limit: int = 300) -> dict:
        return {"entries": memory.list_journal(limit=limit)}
    @app.post("/rate")
    async def rate(request: Request) -> dict:
        """Record Brian's 👍/👎 on a Lyra output (chat reply, reflection, journal)."""
        b = await request.json()
        rating = int(b.get("rating", 0))
        content = (b.get("content") or "").strip()
        if not content or rating == 0:
            return {"ok": False}
        memory.add_rating(
            kind=b.get("kind") or "chat", rating=rating, content=content,
            context=(b.get("context") or None), ref=b.get("ref"), note=b.get("note"),
        )
        logbus.log("info", "rating", kind=b.get("kind"), rating=1 if rating >= 0 else -1)
        return {"ok": True, "counts": memory.rating_counts()}
    @app.get("/ratings/counts")
    async def ratings_counts() -> dict:
        return memory.rating_counts()
    @app.get("/ratings/export")
    async def ratings_export() -> Response:
        """All ratings as JSONL — the seed for a future fine-tune / preference set."""
        lines = "\n".join(json.dumps(r) for r in memory.list_ratings())
        return Response(content=lines + ("\n" if lines else ""), media_type="application/x-ndjson",
                        headers={"Content-Disposition": 'attachment; filename="lyra_ratings.jsonl"'})
    @app.get("/hand/{hand_id}")
    async def hand_page(hand_id: int) -> FileResponse:
        """Replayable hand-history viewer."""
@@ -354,12 +354,46 @@
    return out.join("\n");
  }
 	function addRateBar(div) {
 	  const bar = document.createElement("div");
 	  bar.className = "rate-bar";
 	  const up = document.createElement("button");
 	  up.className = "rate-btn"; up.textContent = "👍"; up.title = "Good — more like this";
 	  const down = document.createElement("button");
 	  down.className = "rate-btn"; down.textContent = "👎"; down.title = "Off — less like this";
 	  up.addEventListener("click", () => rateMessage(div, 1, up, down));
 	  down.addEventListener("click", () => rateMessage(div, -1, up, down));
 	  bar.appendChild(up); bar.appendChild(down);
 	  div.appendChild(bar);
 	}
 	function rateMessage(div, value, up, down) {
 	  // context = the nearest preceding user message
 	  let ctx = "", p = div.previousElementSibling;
 	  while (p) {
 	    if (p.classList && p.classList.contains("user")) { ctx = p.textContent; break; }
 	    p = p.previousElementSibling;
 	  }
 	  fetch(`${RELAY_BASE}/rate`, {
 	    method: "POST", headers: { "Content-Type": "application/json" },
 	    body: JSON.stringify({ kind: "chat", rating: value, content: div.dataset.raw || "", context: ctx, session_id: currentSession })
 	  }).catch(() => {});
 	  up.classList.toggle("rated", value === 1);
 	  down.classList.toggle("rated", value === -1);
 	}
 	function addMessage(role, text, autoScroll = true) {
 	  const messagesEl = document.getElementById("messages");
 	  const msgDiv = document.createElement("div");
 	  msgDiv.className = `msg ${role}`;
-	  if (role === "assistant") { msgDiv.innerHTML = renderMarkdown(text); } else { msgDiv.textContent = text; }
+	  if (role === "assistant") {
 	    msgDiv.innerHTML = renderMarkdown(text);
 	    msgDiv.dataset.raw = text;
 	    addRateBar(msgDiv);
 	  } else {
 	    msgDiv.textContent = text;
 	  }
 	  messagesEl.appendChild(msgDiv);
 	  // Auto-scroll to bottom if enabled
@@ -52,6 +52,12 @@
    .time { color: var(--fade); font-size: .72rem; }
    .src  { color: var(--fade); font-size: .68rem; opacity: .7; }
    .text { font-size: .98rem; line-height: 1.55; }
    .jrate { display: flex; gap: 8px; margin-top: 6px; opacity: .35; }
    .entry:hover .jrate { opacity: .85; }
    .jr { background: none; border: none; cursor: pointer; font-size: .85rem; padding: 2px 5px;
          border-radius: 5px; filter: grayscale(.6); -webkit-tap-highlight-color: transparent; }
    .jr:hover { filter: none; background: rgba(255,122,0,.12); }
    .jr.rated { filter: none; background: rgba(255,122,0,.25); opacity: 1; }
    .empty { color: var(--fade); text-align: center; padding: 44px 16px; }
    .hidden { display: none !important; }
  </style>
@@ -115,12 +121,29 @@
              ${e.source ? `<span class="src">via ${esc(e.source)}</span>` : ''}
            </div>
            <div class="text">${esc(e.content)}</div>
            <div class="jrate">
              <button class="jr" data-id="${e.id}" data-val="1">👍</button>
              <button class="jr" data-id="${e.id}" data-val="-1">👎</button>
            </div>
          </div>
        </div>`;
      }
      root.innerHTML = html;
    }
    // 👍/👎 on a thought -> /rate (fine-tune signal)
    root.addEventListener('click', (ev) => {
      const b = ev.target.closest('.jr'); if (!b) return;
      const e = entries.find(x => String(x.id) === b.dataset.id); if (!e) return;
      fetch('/rate', {
        method: 'POST', headers: { 'Content-Type': 'application/json' },
        body: JSON.stringify({ kind: e.kind, rating: Number(b.dataset.val), content: e.content, ref: e.id })
      }).catch(() => {});
      const bar = b.parentElement;
      bar.querySelectorAll('.jr').forEach(x => x.classList.remove('rated'));
      b.classList.add('rated');
    });
    async function load(){
      try {
        const r = await fetch('/journal/data', { cache: 'no-store' });
@@ -994,3 +994,14 @@ select:hover {
  border-radius: 6px; padding: 10px 12px; margin: 8px 0; overflow-x: auto;
 }
 .msg.assistant pre code { background: none; padding: 0; font-size: 0.85em; }
 /* Behind-the-scenes 👍/👎 feedback (fine-tune signal) — subtle until hovered. */
 .rate-bar { display: flex; gap: 6px; margin-top: 7px; opacity: 0.3; transition: opacity .15s; }
 .msg.assistant:hover .rate-bar { opacity: 0.85; }
 .rate-btn {
  background: none; border: none; cursor: pointer; font-size: 0.85rem;
  padding: 2px 5px; border-radius: 5px; line-height: 1; filter: grayscale(0.6);
  -webkit-tap-highlight-color: transparent;
 }
 .rate-btn:hover { filter: none; background: rgba(255,122,0,0.12); }
 .rate-btn.rated { filter: none; background: rgba(255,122,0,0.25); opacity: 1; }
@@ -0,0 +1,28 @@
 """Behind-the-scenes feedback storage (fine-tune signal)."""
 from __future__ import annotations
 import importlib
 import pytest
@pytest.fixture
 def memory(tmp_path, monkeypatch):
    monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "t.db"))
    from lyra import llm
    monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
    import lyra.memory as m
    importlib.reload(m)
    return m
 def test_rating_counts_and_upsert(memory):
    memory.add_rating("chat", 1, "good reply", context="hey")
    memory.add_rating("reflection", -1, "repetitive thought")
    assert memory.rating_counts() == {"total": 2, "up": 1, "down": 1}
    assert any(r["context"] == "hey" for r in memory.list_ratings())
    # re-rating the same content replaces the row (no duplicate; flips the rating)
    memory.add_rating("chat", -1, "good reply")
    assert memory.rating_counts() == {"total": 2, "up": 0, "down": 2}
    assert any(r["content"] == "good reply" and r["rating"] == -1 for r in memory.list_ratings())