4f770f2e43
Brian can rate Lyra's outputs as he uses her; each rating is stored as a (context, content, rating) triple — the shape a future fine-tune / preference dataset wants, collected passively during real use. - memory: ratings table + add_rating (upsert: one row per item, re-rating replaces), list_ratings, rating_counts - server: POST /rate, GET /ratings/counts, GET /ratings/export (JSONL download) - chat UI: subtle 👍/👎 on each assistant reply, captures the prompting message as context - journal/reflection UI: 👍/👎 on each thought - tests: counts + upsert-replace behavior Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
29 lines
1.0 KiB
Python
29 lines
1.0 KiB
Python
"""Behind-the-scenes feedback storage (fine-tune signal)."""
|
|
from __future__ import annotations
|
|
|
|
import importlib
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture
|
|
def memory(tmp_path, monkeypatch):
|
|
monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "t.db"))
|
|
from lyra import llm
|
|
monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
|
|
import lyra.memory as m
|
|
importlib.reload(m)
|
|
return m
|
|
|
|
|
|
def test_rating_counts_and_upsert(memory):
|
|
memory.add_rating("chat", 1, "good reply", context="hey")
|
|
memory.add_rating("reflection", -1, "repetitive thought")
|
|
assert memory.rating_counts() == {"total": 2, "up": 1, "down": 1}
|
|
assert any(r["context"] == "hey" for r in memory.list_ratings())
|
|
|
|
# re-rating the same content replaces the row (no duplicate; flips the rating)
|
|
memory.add_rating("chat", -1, "good reply")
|
|
assert memory.rating_counts() == {"total": 2, "up": 0, "down": 2}
|
|
assert any(r["content"] == "good reply" and r["rating"] == -1 for r in memory.list_ratings())
|