"""Seed the poker tracker from Brian's curated .md session logs. Each `# YYYY-MM-DD — ...` block in the log is LLM-extracted into structured meta + hands + villains, then written as a historical session (real date, money, net), with the original markdown stored as that session's recap. Run dry first to eyeball the extraction, then commit. uv run python -m lyra.backfill # dry-run ALL sessions (no writes) uv run python -m lyra.backfill --dry 2 # dry-run first 2 uv run python -m lyra.backfill --commit # seed all (writes to DB) uv run python -m lyra.backfill --commit --reset # wipe poker data first, then seed """ from __future__ import annotations import json import re import sys from lyra import llm, poker LOG_PATH = "import/pokerlog_asof6-16-26.md" _EXTRACT_PROMPT = """Extract a structured record from this single poker session log. \ Output ONLY JSON, no prose, no code fences: { "date": "YYYY-MM-DD", "venue": "", "game": "NLH|PLO|Stud8|Mixed", "stakes": "", "format": "cash" | "tournament", "buy_in_total": , "cash_out": , "net": , "hours": , "mood": "", "hands": [ // each KEY hand, in the canonical hand-history schema: {"hero_pos": "..", "hero_cards": [".."], "players": [{"pos":"..","name":,"cards":[..]|null}], "actions": [{"street":"..","pos":"..","action":"..","amount":}, {"street":"flop","board":[".."]}], "board": [".."], "result": {"hero_net": , "summary": ".."}, "tag": "well_played|leak|cooler|confidence|notable|null", "lesson": ""} ], "villains": [ {"name": "", "description": "", "tendencies": "", "adjustment": "", "category": "feeder|risky|reg|unknown"} ] } Card rule: cards are rank+suit using SUIT LETTERS ONLY (s h d c) — never unicode symbols \ (no ♥♦♣♠). Use a card's real suit ONLY if the log explicitly states it for THAT card; \ otherwise the suit is 'x' (e.g. "Jx","Tx","4x") — never a bare rank, never an invented suit. \ A suit shown on the board does NOT apply to a hole card. Unknown whole card = "x". Tournaments: buy_in_total = entry + rebuys; cash_out = winnings (0 if busted, so a bust nets -buy_in). Only include villains with a real handle/nickname (skip anonymous descriptors like "the drunk guy", \ "final-hand caller"). Only include hands actually described. net = cash_out - buy_in_total. Be faithful to the log.""" def split_sessions(md: str) -> list[str]: """Split the log into individual session blocks on '# YYYY-MM-DD' headers.""" parts = re.split(r"(?=^# \d{4}-\d{2}-\d{2})", md, flags=re.M) return [p.strip() for p in parts if re.match(r"^# \d{4}-\d{2}-\d{2}", p.strip())] def _safe_json(s: str) -> dict | None: try: return json.loads(s) except (json.JSONDecodeError, TypeError): m = re.search(r"\{.*\}", s or "", re.S) if m: try: return json.loads(m.group()) except json.JSONDecodeError: return None return None def extract(block: str, backend: str = "cloud") -> dict | None: return _safe_json(llm.complete( [{"role": "system", "content": _EXTRACT_PROMPT}, {"role": "user", "content": block}], backend=backend, )) _real_handle = poker._real_handle # one canonical filter (lives in poker.py) def seed(ex: dict, block: str, with_hands: bool = False) -> dict: """Write one extracted session + villains (+ hands only if asked) to the DB. Hands are OFF by default: reconstructing a clean replayable hand from old narrative prose is too lossy (mangled cards/positions). Sessions, their original writeups (recap), and villain dossiers seed cleanly; hands are best captured fresh from Brian's own shorthand going forward. """ sid = poker.import_session( date=ex.get("date") or "2026-01-01", venue=ex.get("venue"), game=ex.get("game") or "NLH", stakes=ex.get("stakes"), fmt=ex.get("format") or "cash", buy_in_total=ex.get("buy_in_total") or 0, cash_out=ex.get("cash_out"), hours=ex.get("hours"), mood=ex.get("mood"), recap_md=block, ) n_hands = 0 if with_hands: for h in ex.get("hands") or []: hid = poker.store_hand_history(h, session_id=sid) poker.link_hand_players(hid, h, session_id=sid) n_hands += 1 n_villains = 0 for v in ex.get("villains") or []: if _real_handle(v.get("name")): poker.upsert_player(name=v["name"], venue=ex.get("venue"), description=v.get("description"), tendencies=v.get("tendencies"), adjustment=v.get("adjustment"), category=v.get("category")) n_villains += 1 return {"session_id": sid, "date": ex.get("date"), "venue": ex.get("venue"), "net": ex.get("net"), "hands": n_hands, "villains": n_villains} def main() -> int: args = sys.argv[1:] commit = "--commit" in args reset = "--reset" in args with_hands = "--with-hands" in args # off by default — prose->hand replay is too lossy limit = None for i, a in enumerate(args): if a == "--dry" and i + 1 < len(args) and args[i + 1].isdigit(): limit = int(args[i + 1]) blocks = split_sessions(open(LOG_PATH, encoding="utf-8").read()) if limit: blocks = blocks[:limit] print(f"{len(blocks)} session block(s). mode={'COMMIT' if commit else 'DRY-RUN'}") if commit and reset: wiped = poker.clear_all() print(f"reset: wiped {wiped}") for b in blocks: ex = extract(b) if not ex: print(f" ! could not parse a block: {b[:60]!r}") continue if commit: print(" seeded:", seed(ex, b, with_hands=with_hands)) else: print(f"\n=== {ex.get('date')} — {ex.get('venue')} {ex.get('stakes')} " f"({ex.get('format')}) net {ex.get('net')} ===") kept = [v.get("name") for v in (ex.get("villains") or []) if _real_handle(v.get("name"))] print(f" hands: {len(ex.get('hands') or [])} | villains kept: {kept}") for h in (ex.get("hands") or [])[:3]: print(f" - {h.get('hero_pos')} {h.get('hero_cards')} " f"net {(h.get('result') or {}).get('hero_net')} [{h.get('tag')}]") return 0 if __name__ == "__main__": raise SystemExit(main())