feat: persona chat loop, web UI, and local (Ollama) embeddings

Phase 1 — persona + persistent memory chat loop:
- lyra/persona.py + personas/lyra.md: editable identity/voice (friend-first,
  honest, never invents poker math)
- lyra/chat.py: turn loop assembling persona + cross-session recall + recent
  context, persisting both sides to SQLite
- lyra/session.py, lyra/__main__.py: session lifecycle + `lyra` REPL

Phase 1.25 — reuse the old web UI:
- vendored the prior single-page UI into lyra/web/static, repointed to
  same-origin
- lyra/web/server.py (FastAPI): serves the UI and backs its endpoint contract
  (/v1/chat/completions, session CRUD, health, inert thinking-stream) with the
  new chat loop + memory; SQLite stays the single source of truth
- `lyra-web` console script

Local backends — test for free, no OpenAI key:
- llm.embed routes via EMBED_BACKEND (cloud=OpenAI, local=Ollama /api/embed)
- simplified UI backend selector to Local (Ollama) / Cloud (OpenAI), default local
- memory connection opened check_same_thread=False for the threaded server

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-15 18:36:31 +00:00
parent 6d88505697
commit 3b9e0bb1e0
17 changed files with 2973 additions and 4 deletions
+74 -1
View File
@@ -27,6 +27,12 @@ CREATE TABLE IF NOT EXISTS exchanges (
created_at TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_session_created ON exchanges(session_id, created_at);
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
name TEXT,
created_at TEXT NOT NULL
);
"""
_conn: sqlite3.Connection | None = None
@@ -41,7 +47,10 @@ def _connection() -> sqlite3.Connection:
if _conn is not None:
_conn.close()
cfg.db_path.parent.mkdir(parents=True, exist_ok=True)
_conn = sqlite3.connect(cfg.db_path)
# check_same_thread=False: the web server runs blocking work in a thread
# pool, so the singleton connection is touched from threads other than
# the one that created it. Safe here under single-user, low-concurrency use.
_conn = sqlite3.connect(cfg.db_path, check_same_thread=False)
_conn.row_factory = sqlite3.Row
_conn.executescript(SCHEMA)
_conn_path = cfg.db_path
@@ -100,6 +109,70 @@ def recent(session_id: str, n: int = 10) -> list[Exchange]:
]
def ensure_session(session_id: str, name: str | None = None) -> None:
"""Create the session row if absent; set its name if one is given."""
now = datetime.now(timezone.utc).isoformat()
conn = _connection()
with conn:
conn.execute(
"INSERT INTO sessions (id, name, created_at) VALUES (?, ?, ?) "
"ON CONFLICT(id) DO NOTHING",
(session_id, name, now),
)
if name is not None:
conn.execute("UPDATE sessions SET name = ? WHERE id = ?", (name, session_id))
def list_sessions() -> list[dict]:
"""All known sessions (named rows + any session that has exchanges), newest first."""
conn = _connection()
rows = conn.execute(
"""
SELECT s.id AS id,
s.name AS name,
COALESCE(s.created_at, MIN(e.created_at)) AS created_at
FROM sessions s
LEFT JOIN exchanges e ON e.session_id = s.id
GROUP BY s.id
UNION
SELECT e.session_id AS id, NULL AS name, MIN(e.created_at) AS created_at
FROM exchanges e
WHERE e.session_id NOT IN (SELECT id FROM sessions)
GROUP BY e.session_id
ORDER BY created_at DESC
"""
).fetchall()
return [{"id": r["id"], "name": r["name"]} for r in rows]
def history(session_id: str) -> list[Exchange]:
"""Full conversation for a session, oldest first."""
conn = _connection()
rows = conn.execute(
"SELECT id, session_id, role, content, created_at FROM exchanges "
"WHERE session_id = ? ORDER BY id ASC",
(session_id,),
).fetchall()
return [
Exchange(
id=r["id"],
session_id=r["session_id"],
role=r["role"],
content=r["content"],
created_at=r["created_at"],
)
for r in rows
]
def delete_session(session_id: str) -> None:
"""Remove a session and all its exchanges."""
conn = _connection()
with conn:
conn.execute("DELETE FROM exchanges WHERE session_id = ?", (session_id,))
conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
def recall(query: str, k: int = 5, session_id: str | None = None) -> list[Exchange]:
"""Top-k exchanges semantically similar to `query`, optionally scoped to a session."""
[q_vec] = llm.embed([query])