feat: behind-the-scenes 👍/👎 rating system (fine-tune data collection)

Brian can rate Lyra's outputs as he uses her; each rating is stored as a
(context, content, rating) triple — the shape a future fine-tune / preference
dataset wants, collected passively during real use.

- memory: ratings table + add_rating (upsert: one row per item, re-rating
  replaces), list_ratings, rating_counts
- server: POST /rate, GET /ratings/counts, GET /ratings/export (JSONL download)
- chat UI: subtle 👍/👎 on each assistant reply, captures the prompting message
  as context
- journal/reflection UI: 👍/👎 on each thought
- tests: counts + upsert-replace behavior

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-18 19:32:27 +00:00
parent 9befe4d403
commit 4f770f2e43
6 changed files with 173 additions and 1 deletions
+50
View File
@@ -92,6 +92,21 @@ CREATE TABLE IF NOT EXISTS journal (
source TEXT source TEXT
); );
CREATE INDEX IF NOT EXISTS idx_journal_created ON journal(created_at); CREATE INDEX IF NOT EXISTS idx_journal_created ON journal(created_at);
-- Brian's behind-the-scenes feedback on Lyra's outputs (chat replies, reflections,
-- journal/metacognition). Stored as (context, content, rating) — the shape a future
-- fine-tune / preference dataset wants. One row per rated item (re-rating updates it).
CREATE TABLE IF NOT EXISTS ratings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
created_at TEXT NOT NULL,
kind TEXT NOT NULL, -- chat | reflection | metacognition | journal
rating INTEGER NOT NULL, -- +1 (good / want more) or -1 (off / want less)
content TEXT NOT NULL, -- the rated output
context TEXT, -- what prompted it (e.g. the user message for a chat reply)
ref TEXT, -- optional source id (journal id, session id, ...)
note TEXT
);
CREATE INDEX IF NOT EXISTS idx_ratings_created ON ratings(created_at);
""" """
_conn: sqlite3.Connection | None = None _conn: sqlite3.Connection | None = None
@@ -542,6 +557,41 @@ def add_journal_entry(kind: str, content: str, source: str | None = None) -> int
return int(cur.lastrowid) return int(cur.lastrowid)
def add_rating(kind: str, rating: int, content: str, context: str | None = None,
ref: str | None = None, note: str | None = None) -> int:
"""Record (or replace) Brian's feedback on one Lyra output. One row per item:
re-rating the same content updates it. Returns row id."""
now = datetime.now(timezone.utc).isoformat()
conn = _connection()
with conn:
conn.execute("DELETE FROM ratings WHERE kind = ? AND content = ?", (kind, content))
cur = conn.execute(
"INSERT INTO ratings (created_at, kind, rating, content, context, ref, note) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(now, kind, 1 if rating >= 0 else -1, content, context,
str(ref) if ref is not None else None, note),
)
return int(cur.lastrowid)
def list_ratings(limit: int | None = None) -> list[dict]:
conn = _connection()
sql = "SELECT id, created_at, kind, rating, content, context, ref, note FROM ratings ORDER BY id DESC"
if limit is not None:
sql += f" LIMIT {int(limit)}"
return [dict(r) for r in conn.execute(sql).fetchall()]
def rating_counts() -> dict:
conn = _connection()
r = conn.execute(
"SELECT COUNT(*) AS total, "
"COALESCE(SUM(CASE WHEN rating > 0 THEN 1 ELSE 0 END), 0) AS up, "
"COALESCE(SUM(CASE WHEN rating < 0 THEN 1 ELSE 0 END), 0) AS down FROM ratings"
).fetchone()
return {"total": r["total"], "up": r["up"], "down": r["down"]}
def list_journal(limit: int | None = None, kinds: tuple[str, ...] | None = None) -> list[dict]: def list_journal(limit: int | None = None, kinds: tuple[str, ...] | None = None) -> list[dict]:
"""Journal entries, newest first. Optionally filter by kind.""" """Journal entries, newest first. Optionally filter by kind."""
conn = _connection() conn = _connection()
+26
View File
@@ -142,6 +142,32 @@ def create_app() -> FastAPI:
async def journal_data(limit: int = 300) -> dict: async def journal_data(limit: int = 300) -> dict:
return {"entries": memory.list_journal(limit=limit)} return {"entries": memory.list_journal(limit=limit)}
@app.post("/rate")
async def rate(request: Request) -> dict:
"""Record Brian's 👍/👎 on a Lyra output (chat reply, reflection, journal)."""
b = await request.json()
rating = int(b.get("rating", 0))
content = (b.get("content") or "").strip()
if not content or rating == 0:
return {"ok": False}
memory.add_rating(
kind=b.get("kind") or "chat", rating=rating, content=content,
context=(b.get("context") or None), ref=b.get("ref"), note=b.get("note"),
)
logbus.log("info", "rating", kind=b.get("kind"), rating=1 if rating >= 0 else -1)
return {"ok": True, "counts": memory.rating_counts()}
@app.get("/ratings/counts")
async def ratings_counts() -> dict:
return memory.rating_counts()
@app.get("/ratings/export")
async def ratings_export() -> Response:
"""All ratings as JSONL — the seed for a future fine-tune / preference set."""
lines = "\n".join(json.dumps(r) for r in memory.list_ratings())
return Response(content=lines + ("\n" if lines else ""), media_type="application/x-ndjson",
headers={"Content-Disposition": 'attachment; filename="lyra_ratings.jsonl"'})
@app.get("/hand/{hand_id}") @app.get("/hand/{hand_id}")
async def hand_page(hand_id: int) -> FileResponse: async def hand_page(hand_id: int) -> FileResponse:
"""Replayable hand-history viewer.""" """Replayable hand-history viewer."""
+35 -1
View File
@@ -354,12 +354,46 @@
return out.join("\n"); return out.join("\n");
} }
function addRateBar(div) {
const bar = document.createElement("div");
bar.className = "rate-bar";
const up = document.createElement("button");
up.className = "rate-btn"; up.textContent = "👍"; up.title = "Good — more like this";
const down = document.createElement("button");
down.className = "rate-btn"; down.textContent = "👎"; down.title = "Off — less like this";
up.addEventListener("click", () => rateMessage(div, 1, up, down));
down.addEventListener("click", () => rateMessage(div, -1, up, down));
bar.appendChild(up); bar.appendChild(down);
div.appendChild(bar);
}
function rateMessage(div, value, up, down) {
// context = the nearest preceding user message
let ctx = "", p = div.previousElementSibling;
while (p) {
if (p.classList && p.classList.contains("user")) { ctx = p.textContent; break; }
p = p.previousElementSibling;
}
fetch(`${RELAY_BASE}/rate`, {
method: "POST", headers: { "Content-Type": "application/json" },
body: JSON.stringify({ kind: "chat", rating: value, content: div.dataset.raw || "", context: ctx, session_id: currentSession })
}).catch(() => {});
up.classList.toggle("rated", value === 1);
down.classList.toggle("rated", value === -1);
}
function addMessage(role, text, autoScroll = true) { function addMessage(role, text, autoScroll = true) {
const messagesEl = document.getElementById("messages"); const messagesEl = document.getElementById("messages");
const msgDiv = document.createElement("div"); const msgDiv = document.createElement("div");
msgDiv.className = `msg ${role}`; msgDiv.className = `msg ${role}`;
if (role === "assistant") { msgDiv.innerHTML = renderMarkdown(text); } else { msgDiv.textContent = text; } if (role === "assistant") {
msgDiv.innerHTML = renderMarkdown(text);
msgDiv.dataset.raw = text;
addRateBar(msgDiv);
} else {
msgDiv.textContent = text;
}
messagesEl.appendChild(msgDiv); messagesEl.appendChild(msgDiv);
// Auto-scroll to bottom if enabled // Auto-scroll to bottom if enabled
+23
View File
@@ -52,6 +52,12 @@
.time { color: var(--fade); font-size: .72rem; } .time { color: var(--fade); font-size: .72rem; }
.src { color: var(--fade); font-size: .68rem; opacity: .7; } .src { color: var(--fade); font-size: .68rem; opacity: .7; }
.text { font-size: .98rem; line-height: 1.55; } .text { font-size: .98rem; line-height: 1.55; }
.jrate { display: flex; gap: 8px; margin-top: 6px; opacity: .35; }
.entry:hover .jrate { opacity: .85; }
.jr { background: none; border: none; cursor: pointer; font-size: .85rem; padding: 2px 5px;
border-radius: 5px; filter: grayscale(.6); -webkit-tap-highlight-color: transparent; }
.jr:hover { filter: none; background: rgba(255,122,0,.12); }
.jr.rated { filter: none; background: rgba(255,122,0,.25); opacity: 1; }
.empty { color: var(--fade); text-align: center; padding: 44px 16px; } .empty { color: var(--fade); text-align: center; padding: 44px 16px; }
.hidden { display: none !important; } .hidden { display: none !important; }
</style> </style>
@@ -115,12 +121,29 @@
${e.source ? `<span class="src">via ${esc(e.source)}</span>` : ''} ${e.source ? `<span class="src">via ${esc(e.source)}</span>` : ''}
</div> </div>
<div class="text">${esc(e.content)}</div> <div class="text">${esc(e.content)}</div>
<div class="jrate">
<button class="jr" data-id="${e.id}" data-val="1">👍</button>
<button class="jr" data-id="${e.id}" data-val="-1">👎</button>
</div>
</div> </div>
</div>`; </div>`;
} }
root.innerHTML = html; root.innerHTML = html;
} }
// 👍/👎 on a thought -> /rate (fine-tune signal)
root.addEventListener('click', (ev) => {
const b = ev.target.closest('.jr'); if (!b) return;
const e = entries.find(x => String(x.id) === b.dataset.id); if (!e) return;
fetch('/rate', {
method: 'POST', headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ kind: e.kind, rating: Number(b.dataset.val), content: e.content, ref: e.id })
}).catch(() => {});
const bar = b.parentElement;
bar.querySelectorAll('.jr').forEach(x => x.classList.remove('rated'));
b.classList.add('rated');
});
async function load(){ async function load(){
try { try {
const r = await fetch('/journal/data', { cache: 'no-store' }); const r = await fetch('/journal/data', { cache: 'no-store' });
+11
View File
@@ -994,3 +994,14 @@ select:hover {
border-radius: 6px; padding: 10px 12px; margin: 8px 0; overflow-x: auto; border-radius: 6px; padding: 10px 12px; margin: 8px 0; overflow-x: auto;
} }
.msg.assistant pre code { background: none; padding: 0; font-size: 0.85em; } .msg.assistant pre code { background: none; padding: 0; font-size: 0.85em; }
/* Behind-the-scenes 👍/👎 feedback (fine-tune signal) — subtle until hovered. */
.rate-bar { display: flex; gap: 6px; margin-top: 7px; opacity: 0.3; transition: opacity .15s; }
.msg.assistant:hover .rate-bar { opacity: 0.85; }
.rate-btn {
background: none; border: none; cursor: pointer; font-size: 0.85rem;
padding: 2px 5px; border-radius: 5px; line-height: 1; filter: grayscale(0.6);
-webkit-tap-highlight-color: transparent;
}
.rate-btn:hover { filter: none; background: rgba(255,122,0,0.12); }
.rate-btn.rated { filter: none; background: rgba(255,122,0,0.25); opacity: 1; }
+28
View File
@@ -0,0 +1,28 @@
"""Behind-the-scenes feedback storage (fine-tune signal)."""
from __future__ import annotations
import importlib
import pytest
@pytest.fixture
def memory(tmp_path, monkeypatch):
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "t.db"))
from lyra import llm
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
import lyra.memory as m
importlib.reload(m)
return m
def test_rating_counts_and_upsert(memory):
memory.add_rating("chat", 1, "good reply", context="hey")
memory.add_rating("reflection", -1, "repetitive thought")
assert memory.rating_counts() == {"total": 2, "up": 1, "down": 1}
assert any(r["context"] == "hey" for r in memory.list_ratings())
# re-rating the same content replaces the row (no duplicate; flips the rating)
memory.add_rating("chat", -1, "good reply")
assert memory.rating_counts() == {"total": 2, "up": 0, "down": 2}
assert any(r["content"] == "good reply" and r["rating"] == -1 for r in memory.list_ratings())