From 6761c3f9780e65ae2ecb06f1a037c32e860e7b8f Mon Sep 17 00:00:00 2001 From: serversdown Date: Thu, 18 Jun 2026 05:55:22 +0000 Subject: [PATCH] feat: backfill poker tracker from curated .md session logs Seeds the tracker from Brian's real history (import/pokerlog_*.md): each session block is LLM-extracted into structured meta + hands + villains and written as a historical session (real date, money, net), with the original markdown stored as that session's recap. - lyra/backfill.py: split log -> per-session LLM extract -> seed; dry-run by default, --commit / --reset; only-real-handle villain filter - poker.import_session() (historical closed session), clear_all() (reseed), prune_anonymous_players(), shared _real_handle() filter (also applied in link_hand_players so auto-linked hand players skip anonymous descriptors + hero), _normalize_parsed() to map unicode card suits -> letters - result: 10 sessions, 36 hands, 17 real villain dossiers; running_stats now reflects real net (+1057 at 1/3 over 8 sessions) Co-Authored-By: Claude Opus 4.8 (1M context) --- lyra/backfill.py | 143 +++++++++++++++++++++++++++++++++++++++++++++++ lyra/poker.py | 88 ++++++++++++++++++++++++++++- 2 files changed, 230 insertions(+), 1 deletion(-) create mode 100644 lyra/backfill.py diff --git a/lyra/backfill.py b/lyra/backfill.py new file mode 100644 index 0000000..8e269ea --- /dev/null +++ b/lyra/backfill.py @@ -0,0 +1,143 @@ +"""Seed the poker tracker from Brian's curated .md session logs. + +Each `# YYYY-MM-DD — ...` block in the log is LLM-extracted into structured meta ++ hands + villains, then written as a historical session (real date, money, net), +with the original markdown stored as that session's recap. Run dry first to eyeball +the extraction, then commit. + + uv run python -m lyra.backfill # dry-run ALL sessions (no writes) + uv run python -m lyra.backfill --dry 2 # dry-run first 2 + uv run python -m lyra.backfill --commit # seed all (writes to DB) + uv run python -m lyra.backfill --commit --reset # wipe poker data first, then seed +""" +from __future__ import annotations + +import json +import re +import sys + +from lyra import llm, poker + +LOG_PATH = "import/pokerlog_asof6-16-26.md" + +_EXTRACT_PROMPT = """Extract a structured record from this single poker session log. \ +Output ONLY JSON, no prose, no code fences: +{ + "date": "YYYY-MM-DD", + "venue": "", "game": "NLH|PLO|Stud8|Mixed", "stakes": "", + "format": "cash" | "tournament", + "buy_in_total": , "cash_out": , "net": , + "hours": , "mood": "", + "hands": [ + // each KEY hand, in the canonical hand-history schema: + {"hero_pos": "..", "hero_cards": [".."], "players": [{"pos":"..","name":,"cards":[..]|null}], + "actions": [{"street":"..","pos":"..","action":"..","amount":}, {"street":"flop","board":[".."]}], + "board": [".."], "result": {"hero_net": , "summary": ".."}, + "tag": "well_played|leak|cooler|confidence|notable|null", "lesson": ""} + ], + "villains": [ + {"name": "", "description": "", + "tendencies": "", "adjustment": "", "category": "feeder|risky|reg|unknown"} + ] +} + +Card rule: cards are rank+suit using SUIT LETTERS ONLY (s h d c) — never unicode symbols \ +(no ♥♦♣♠). Use a card's real suit ONLY if the log explicitly states it for THAT card; \ +otherwise the suit is 'x' (e.g. "Jx","Tx","4x") — never a bare rank, never an invented suit. \ +A suit shown on the board does NOT apply to a hole card. Unknown whole card = "x". +Tournaments: buy_in_total = entry + rebuys; cash_out = winnings (0 if busted, so a bust nets -buy_in). +Only include villains with a real handle/nickname (skip anonymous descriptors like "the drunk guy", \ +"final-hand caller"). Only include hands actually described. net = cash_out - buy_in_total. Be faithful to the log.""" + + +def split_sessions(md: str) -> list[str]: + """Split the log into individual session blocks on '# YYYY-MM-DD' headers.""" + parts = re.split(r"(?=^# \d{4}-\d{2}-\d{2})", md, flags=re.M) + return [p.strip() for p in parts if re.match(r"^# \d{4}-\d{2}-\d{2}", p.strip())] + + +def _safe_json(s: str) -> dict | None: + try: + return json.loads(s) + except (json.JSONDecodeError, TypeError): + m = re.search(r"\{.*\}", s or "", re.S) + if m: + try: + return json.loads(m.group()) + except json.JSONDecodeError: + return None + return None + + +def extract(block: str, backend: str = "cloud") -> dict | None: + return _safe_json(llm.complete( + [{"role": "system", "content": _EXTRACT_PROMPT}, {"role": "user", "content": block}], + backend=backend, + )) + + +_real_handle = poker._real_handle # one canonical filter (lives in poker.py) + + +def seed(ex: dict, block: str) -> dict: + """Write one extracted session + its hands + villains to the DB. Returns a summary.""" + sid = poker.import_session( + date=ex.get("date") or "2026-01-01", venue=ex.get("venue"), game=ex.get("game") or "NLH", + stakes=ex.get("stakes"), fmt=ex.get("format") or "cash", + buy_in_total=ex.get("buy_in_total") or 0, cash_out=ex.get("cash_out"), + hours=ex.get("hours"), mood=ex.get("mood"), recap_md=block, + ) + n_hands = 0 + for h in ex.get("hands") or []: + hid = poker.store_hand_history(h, session_id=sid) + poker.link_hand_players(hid, h, session_id=sid) + n_hands += 1 + n_villains = 0 + for v in ex.get("villains") or []: + if _real_handle(v.get("name")): + poker.upsert_player(name=v["name"], venue=ex.get("venue"), + description=v.get("description"), tendencies=v.get("tendencies"), + adjustment=v.get("adjustment"), category=v.get("category")) + n_villains += 1 + return {"session_id": sid, "date": ex.get("date"), "venue": ex.get("venue"), + "net": ex.get("net"), "hands": n_hands, "villains": n_villains} + + +def main() -> int: + args = sys.argv[1:] + commit = "--commit" in args + reset = "--reset" in args + limit = None + for i, a in enumerate(args): + if a == "--dry" and i + 1 < len(args) and args[i + 1].isdigit(): + limit = int(args[i + 1]) + + blocks = split_sessions(open(LOG_PATH, encoding="utf-8").read()) + if limit: + blocks = blocks[:limit] + print(f"{len(blocks)} session block(s). mode={'COMMIT' if commit else 'DRY-RUN'}") + + if commit and reset: + wiped = poker.clear_all() + print(f"reset: wiped {wiped}") + + for b in blocks: + ex = extract(b) + if not ex: + print(f" ! could not parse a block: {b[:60]!r}") + continue + if commit: + print(" seeded:", seed(ex, b)) + else: + print(f"\n=== {ex.get('date')} — {ex.get('venue')} {ex.get('stakes')} " + f"({ex.get('format')}) net {ex.get('net')} ===") + kept = [v.get("name") for v in (ex.get("villains") or []) if _real_handle(v.get("name"))] + print(f" hands: {len(ex.get('hands') or [])} | villains kept: {kept}") + for h in (ex.get("hands") or [])[:3]: + print(f" - {h.get('hero_pos')} {h.get('hero_cards')} " + f"net {(h.get('result') or {}).get('hero_net')} [{h.get('tag')}]") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/lyra/poker.py b/lyra/poker.py index 7557d57..9eb0d81 100644 --- a/lyra/poker.py +++ b/lyra/poker.py @@ -149,6 +149,38 @@ def get_session(session_id: int) -> dict | None: return dict(r) if r else None +def import_session(date: str, venue: str | None = None, game: str = "NLH", + stakes: str | None = None, fmt: str = "cash", + buy_in_total: float = 0.0, cash_out: float | None = None, + hours: float | None = None, mood: str | None = None, + recap_md: str | None = None) -> int: + """Insert a historical (already-closed) session with a real date. For backfill.""" + started = f"{date}T20:00:00+00:00" # logs are evening sessions; time is approximate + net = (cash_out or 0) - (buy_in_total or 0) if cash_out is not None else None + conn = _c() + with conn: + cur = conn.execute( + "INSERT INTO poker_sessions (started_at, ended_at, venue, game, stakes, format, " + "buy_in_total, cash_out, net, hours, mood, status, recap_md) " + "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'closed', ?)", + (started, started, venue, game, stakes, fmt, buy_in_total or 0, cash_out, + net, hours, mood, recap_md), + ) + return int(cur.lastrowid) + + +def clear_all() -> dict: + """Wipe all poker data (sessions/hands/players/reads/observations). For a clean reseed.""" + conn = _c() + counts = {} + with conn: + for t in ("poker_hands", "player_observations", "player_reads", + "poker_players", "poker_sessions"): + counts[t] = conn.execute(f"SELECT COUNT(*) n FROM {t}").fetchone()["n"] + conn.execute(f"DELETE FROM {t}") + return counts + + def live_session() -> dict | None: """The current open session, if any.""" r = _c().execute( @@ -321,9 +353,38 @@ def _review_session_id() -> int: return int(cur.lastrowid) +_SUIT_SYM = {"♥": "h", "♦": "d", "♣": "c", "♠": "s"} + + +def _norm_card(c): + if not isinstance(c, str): + return c + s = c.strip() + for sym, ltr in _SUIT_SYM.items(): + s = s.replace(sym, ltr) + return s + + +def _normalize_parsed(p: dict) -> dict: + """Normalize card strings (unicode suits -> letters) across a parsed hand.""" + if not isinstance(p, dict): + return p + for key in ("hero_cards", "board"): + if isinstance(p.get(key), list): + p[key] = [_norm_card(c) for c in p[key]] + for pl in p.get("players") or []: + if isinstance(pl, dict) and isinstance(pl.get("cards"), list): + pl["cards"] = [_norm_card(c) for c in pl["cards"]] + for a in p.get("actions") or []: + if isinstance(a, dict) and isinstance(a.get("board"), list): + a["board"] = [_norm_card(c) for c in a["board"]] + return p + + def store_hand_history(parsed: dict, session_id: int | None = None, tag: str | None = None, lesson: str | None = None) -> int: """Store a parsed hand: full JSON + extracted flat fields for stats/listing.""" + parsed = _normalize_parsed(parsed) sid = _resolve(session_id) or _review_session_id() hero_cards = parsed.get("hero_cards") or [] board = parsed.get("board") or [] @@ -469,6 +530,31 @@ def generate_recap(session_id: int | None = None, backend: str | None = None) -> # --- villain file --- +_GENERIC_NAME = ("player", "guy", "villain", "caller", "drunk", "unknown", "hero", "seat", + "the ", "aggro", "young", "older", "straddler", "opener", "brian") + + +def _real_handle(name: str | None) -> bool: + """A real, persistable player handle — not an anonymous descriptor or the hero.""" + n = (name or "").strip().lower() + if len(n) < 2 or n in {"utg", "utg1", "mp", "lj", "hj", "co", "btn", "sb", "bb"}: + return False + return not any(g in n for g in _GENERIC_NAME) + + +def prune_anonymous_players() -> int: + """Delete players (and their observations/reads) whose names aren't real handles.""" + conn = _c() + bad = [r["id"] for r in conn.execute("SELECT id, name FROM poker_players").fetchall() + if not _real_handle(r["name"])] + with conn: + for pid in bad: + conn.execute("DELETE FROM player_observations WHERE player_id = ?", (pid,)) + conn.execute("DELETE FROM player_reads WHERE player_id = ?", (pid,)) + conn.execute("DELETE FROM poker_players WHERE id = ?", (pid,)) + return len(bad) + + def upsert_player(name: str, venue: str | None = None, description: str | None = None, tendencies: str | None = None, adjustment: str | None = None, category: str | None = None) -> int: @@ -533,7 +619,7 @@ def link_hand_players(hand_id: int, parsed: dict, session_id: int | None = None) linked = 0 for pl in (parsed.get("players") or []): name = (pl.get("name") or "").strip() - if not name: + if not _real_handle(name): # skip anonymous descriptors + the hero continue pid = upsert_player(name) vpip, pfr, saw = _player_flags(parsed, pl.get("pos"))