feat: ChatGPT chat-log importer
Import the parser's {title, messages} JSON into Lyra's memory so past
conversations seed recall (and, later, the era-rollup tier).
- lyra/ingest.py: one conversation -> one session, text messages -> exchanges;
skips non-text (image asset) messages and non user/assistant roles; embeddings
batched; idempotent by filename-derived session id; `lyra-import <dir>` CLI
- memory.add_exchanges_bulk: batched insert of pre-embedded rows
Format has no timestamps yet, so imports are stamped at import time; a future
dated export will let era memory group by real calendar time.
Verified on the 68-file lyra dev set: 7519 exchanges, idempotent re-run, recall
returns relevant history.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -108,6 +108,22 @@ def remember(session_id: str, role: str, content: str) -> int:
|
||||
return int(cur.lastrowid)
|
||||
|
||||
|
||||
def add_exchanges_bulk(session_id: str, rows: list[tuple[str, str, list[float], str]]) -> int:
|
||||
"""Insert many pre-embedded exchanges at once.
|
||||
|
||||
Each row is (role, content, embedding, created_at). Used by the importer to
|
||||
avoid one INSERT (and one embed round-trip) per message. Returns row count.
|
||||
"""
|
||||
conn = _connection()
|
||||
with conn:
|
||||
conn.executemany(
|
||||
"INSERT INTO exchanges (session_id, role, content, embedding, created_at) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
[(session_id, role, content, _to_blob(emb), ca) for role, content, emb, ca in rows],
|
||||
)
|
||||
return len(rows)
|
||||
|
||||
|
||||
def recent(session_id: str, n: int = 10) -> list[Exchange]:
|
||||
"""Last `n` exchanges from a session, oldest first."""
|
||||
conn = _connection()
|
||||
|
||||
Reference in New Issue
Block a user