feat: backfill poker tracker from curated .md session logs

Seeds the tracker from Brian's real history (import/pokerlog_*.md): each session
block is LLM-extracted into structured meta + hands + villains and written as a
historical session (real date, money, net), with the original markdown stored as
that session's recap.

- lyra/backfill.py: split log -> per-session LLM extract -> seed; dry-run by
  default, --commit / --reset; only-real-handle villain filter
- poker.import_session() (historical closed session), clear_all() (reseed),
  prune_anonymous_players(), shared _real_handle() filter (also applied in
  link_hand_players so auto-linked hand players skip anonymous descriptors + hero),
  _normalize_parsed() to map unicode card suits -> letters
- result: 10 sessions, 36 hands, 17 real villain dossiers; running_stats now
  reflects real net (+1057 at 1/3 over 8 sessions)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-18 05:55:22 +00:00
parent c7d2279f8d
commit 6761c3f978
2 changed files with 230 additions and 1 deletions
+87 -1
View File
@@ -149,6 +149,38 @@ def get_session(session_id: int) -> dict | None:
return dict(r) if r else None
def import_session(date: str, venue: str | None = None, game: str = "NLH",
stakes: str | None = None, fmt: str = "cash",
buy_in_total: float = 0.0, cash_out: float | None = None,
hours: float | None = None, mood: str | None = None,
recap_md: str | None = None) -> int:
"""Insert a historical (already-closed) session with a real date. For backfill."""
started = f"{date}T20:00:00+00:00" # logs are evening sessions; time is approximate
net = (cash_out or 0) - (buy_in_total or 0) if cash_out is not None else None
conn = _c()
with conn:
cur = conn.execute(
"INSERT INTO poker_sessions (started_at, ended_at, venue, game, stakes, format, "
"buy_in_total, cash_out, net, hours, mood, status, recap_md) "
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'closed', ?)",
(started, started, venue, game, stakes, fmt, buy_in_total or 0, cash_out,
net, hours, mood, recap_md),
)
return int(cur.lastrowid)
def clear_all() -> dict:
"""Wipe all poker data (sessions/hands/players/reads/observations). For a clean reseed."""
conn = _c()
counts = {}
with conn:
for t in ("poker_hands", "player_observations", "player_reads",
"poker_players", "poker_sessions"):
counts[t] = conn.execute(f"SELECT COUNT(*) n FROM {t}").fetchone()["n"]
conn.execute(f"DELETE FROM {t}")
return counts
def live_session() -> dict | None:
"""The current open session, if any."""
r = _c().execute(
@@ -321,9 +353,38 @@ def _review_session_id() -> int:
return int(cur.lastrowid)
_SUIT_SYM = {"": "h", "": "d", "": "c", "": "s"}
def _norm_card(c):
if not isinstance(c, str):
return c
s = c.strip()
for sym, ltr in _SUIT_SYM.items():
s = s.replace(sym, ltr)
return s
def _normalize_parsed(p: dict) -> dict:
"""Normalize card strings (unicode suits -> letters) across a parsed hand."""
if not isinstance(p, dict):
return p
for key in ("hero_cards", "board"):
if isinstance(p.get(key), list):
p[key] = [_norm_card(c) for c in p[key]]
for pl in p.get("players") or []:
if isinstance(pl, dict) and isinstance(pl.get("cards"), list):
pl["cards"] = [_norm_card(c) for c in pl["cards"]]
for a in p.get("actions") or []:
if isinstance(a, dict) and isinstance(a.get("board"), list):
a["board"] = [_norm_card(c) for c in a["board"]]
return p
def store_hand_history(parsed: dict, session_id: int | None = None,
tag: str | None = None, lesson: str | None = None) -> int:
"""Store a parsed hand: full JSON + extracted flat fields for stats/listing."""
parsed = _normalize_parsed(parsed)
sid = _resolve(session_id) or _review_session_id()
hero_cards = parsed.get("hero_cards") or []
board = parsed.get("board") or []
@@ -469,6 +530,31 @@ def generate_recap(session_id: int | None = None, backend: str | None = None) ->
# --- villain file ---
_GENERIC_NAME = ("player", "guy", "villain", "caller", "drunk", "unknown", "hero", "seat",
"the ", "aggro", "young", "older", "straddler", "opener", "brian")
def _real_handle(name: str | None) -> bool:
"""A real, persistable player handle — not an anonymous descriptor or the hero."""
n = (name or "").strip().lower()
if len(n) < 2 or n in {"utg", "utg1", "mp", "lj", "hj", "co", "btn", "sb", "bb"}:
return False
return not any(g in n for g in _GENERIC_NAME)
def prune_anonymous_players() -> int:
"""Delete players (and their observations/reads) whose names aren't real handles."""
conn = _c()
bad = [r["id"] for r in conn.execute("SELECT id, name FROM poker_players").fetchall()
if not _real_handle(r["name"])]
with conn:
for pid in bad:
conn.execute("DELETE FROM player_observations WHERE player_id = ?", (pid,))
conn.execute("DELETE FROM player_reads WHERE player_id = ?", (pid,))
conn.execute("DELETE FROM poker_players WHERE id = ?", (pid,))
return len(bad)
def upsert_player(name: str, venue: str | None = None, description: str | None = None,
tendencies: str | None = None, adjustment: str | None = None,
category: str | None = None) -> int:
@@ -533,7 +619,7 @@ def link_hand_players(hand_id: int, parsed: dict, session_id: int | None = None)
linked = 0
for pl in (parsed.get("players") or []):
name = (pl.get("name") or "").strip()
if not name:
if not _real_handle(name): # skip anonymous descriptors + the hero
continue
pid = upsert_player(name)
vpip, pfr, saw = _player_flags(parsed, pl.get("pos"))