feat: behind-the-scenes 👍/👎 rating system (fine-tune data collection)

Brian can rate Lyra's outputs as he uses her; each rating is stored as a (context, content, rating) triple — the shape a future fine-tune / preference dataset wants, collected passively during real use. - memory: ratings table + add_rating (upsert: one row per item, re-rating replaces), list_ratings, rating_counts - server: POST /rate, GET /ratings/counts, GET /ratings/export (JSONL download) - chat UI: subtle 👍/👎 on each assistant reply, captures the prompting message as context - journal/reflection UI: 👍/👎 on each thought - tests: counts + upsert-replace behavior Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-18 19:32:27 +00:00
parent 9befe4d403
commit 4f770f2e43
6 changed files with 173 additions and 1 deletions
@@ -0,0 +1,28 @@
+"""Behind-the-scenes feedback storage (fine-tune signal)."""
+from __future__ import annotations
+
+import importlib
+
+import pytest
+
+
+@pytest.fixture
+def memory(tmp_path, monkeypatch):
+    monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "t.db"))
+    from lyra import llm
+    monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
+    import lyra.memory as m
+    importlib.reload(m)
+    return m
+
+
+def test_rating_counts_and_upsert(memory):
+    memory.add_rating("chat", 1, "good reply", context="hey")
+    memory.add_rating("reflection", -1, "repetitive thought")
+    assert memory.rating_counts() == {"total": 2, "up": 1, "down": 1}
+    assert any(r["context"] == "hey" for r in memory.list_ratings())
+
+    # re-rating the same content replaces the row (no duplicate; flips the rating)
+    memory.add_rating("chat", -1, "good reply")
+    assert memory.rating_counts() == {"total": 2, "up": 0, "down": 2}
+    assert any(r["content"] == "good reply" and r["rating"] == -1 for r in memory.list_ratings())