feat: backfill poker tracker from curated .md session logs
Seeds the tracker from Brian's real history (import/pokerlog_*.md): each session block is LLM-extracted into structured meta + hands + villains and written as a historical session (real date, money, net), with the original markdown stored as that session's recap. - lyra/backfill.py: split log -> per-session LLM extract -> seed; dry-run by default, --commit / --reset; only-real-handle villain filter - poker.import_session() (historical closed session), clear_all() (reseed), prune_anonymous_players(), shared _real_handle() filter (also applied in link_hand_players so auto-linked hand players skip anonymous descriptors + hero), _normalize_parsed() to map unicode card suits -> letters - result: 10 sessions, 36 hands, 17 real villain dossiers; running_stats now reflects real net (+1057 at 1/3 over 8 sessions) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,143 @@
|
||||
"""Seed the poker tracker from Brian's curated .md session logs.
|
||||
|
||||
Each `# YYYY-MM-DD — ...` block in the log is LLM-extracted into structured meta
|
||||
+ hands + villains, then written as a historical session (real date, money, net),
|
||||
with the original markdown stored as that session's recap. Run dry first to eyeball
|
||||
the extraction, then commit.
|
||||
|
||||
uv run python -m lyra.backfill # dry-run ALL sessions (no writes)
|
||||
uv run python -m lyra.backfill --dry 2 # dry-run first 2
|
||||
uv run python -m lyra.backfill --commit # seed all (writes to DB)
|
||||
uv run python -m lyra.backfill --commit --reset # wipe poker data first, then seed
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
|
||||
from lyra import llm, poker
|
||||
|
||||
LOG_PATH = "import/pokerlog_asof6-16-26.md"
|
||||
|
||||
_EXTRACT_PROMPT = """Extract a structured record from this single poker session log. \
|
||||
Output ONLY JSON, no prose, no code fences:
|
||||
{
|
||||
"date": "YYYY-MM-DD",
|
||||
"venue": "<casino>", "game": "NLH|PLO|Stud8|Mixed", "stakes": "<e.g. 1/3 or null>",
|
||||
"format": "cash" | "tournament",
|
||||
"buy_in_total": <number>, "cash_out": <number|null>, "net": <number|null>,
|
||||
"hours": <number|null>, "mood": "<short mental-game note|null>",
|
||||
"hands": [
|
||||
// each KEY hand, in the canonical hand-history schema:
|
||||
{"hero_pos": "..", "hero_cards": [".."], "players": [{"pos":"..","name":<str|null>,"cards":[..]|null}],
|
||||
"actions": [{"street":"..","pos":"..","action":"..","amount":<num|null>}, {"street":"flop","board":[".."]}],
|
||||
"board": [".."], "result": {"hero_net": <num|null>, "summary": ".."},
|
||||
"tag": "well_played|leak|cooler|confidence|notable|null", "lesson": "<takeaway|null>"}
|
||||
],
|
||||
"villains": [
|
||||
{"name": "<handle/nickname>", "description": "<physical/identifying|null>",
|
||||
"tendencies": "<how they play>", "adjustment": "<how to exploit>", "category": "feeder|risky|reg|unknown"}
|
||||
]
|
||||
}
|
||||
|
||||
Card rule: cards are rank+suit using SUIT LETTERS ONLY (s h d c) — never unicode symbols \
|
||||
(no ♥♦♣♠). Use a card's real suit ONLY if the log explicitly states it for THAT card; \
|
||||
otherwise the suit is 'x' (e.g. "Jx","Tx","4x") — never a bare rank, never an invented suit. \
|
||||
A suit shown on the board does NOT apply to a hole card. Unknown whole card = "x".
|
||||
Tournaments: buy_in_total = entry + rebuys; cash_out = winnings (0 if busted, so a bust nets -buy_in).
|
||||
Only include villains with a real handle/nickname (skip anonymous descriptors like "the drunk guy", \
|
||||
"final-hand caller"). Only include hands actually described. net = cash_out - buy_in_total. Be faithful to the log."""
|
||||
|
||||
|
||||
def split_sessions(md: str) -> list[str]:
|
||||
"""Split the log into individual session blocks on '# YYYY-MM-DD' headers."""
|
||||
parts = re.split(r"(?=^# \d{4}-\d{2}-\d{2})", md, flags=re.M)
|
||||
return [p.strip() for p in parts if re.match(r"^# \d{4}-\d{2}-\d{2}", p.strip())]
|
||||
|
||||
|
||||
def _safe_json(s: str) -> dict | None:
|
||||
try:
|
||||
return json.loads(s)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
m = re.search(r"\{.*\}", s or "", re.S)
|
||||
if m:
|
||||
try:
|
||||
return json.loads(m.group())
|
||||
except json.JSONDecodeError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def extract(block: str, backend: str = "cloud") -> dict | None:
|
||||
return _safe_json(llm.complete(
|
||||
[{"role": "system", "content": _EXTRACT_PROMPT}, {"role": "user", "content": block}],
|
||||
backend=backend,
|
||||
))
|
||||
|
||||
|
||||
_real_handle = poker._real_handle # one canonical filter (lives in poker.py)
|
||||
|
||||
|
||||
def seed(ex: dict, block: str) -> dict:
|
||||
"""Write one extracted session + its hands + villains to the DB. Returns a summary."""
|
||||
sid = poker.import_session(
|
||||
date=ex.get("date") or "2026-01-01", venue=ex.get("venue"), game=ex.get("game") or "NLH",
|
||||
stakes=ex.get("stakes"), fmt=ex.get("format") or "cash",
|
||||
buy_in_total=ex.get("buy_in_total") or 0, cash_out=ex.get("cash_out"),
|
||||
hours=ex.get("hours"), mood=ex.get("mood"), recap_md=block,
|
||||
)
|
||||
n_hands = 0
|
||||
for h in ex.get("hands") or []:
|
||||
hid = poker.store_hand_history(h, session_id=sid)
|
||||
poker.link_hand_players(hid, h, session_id=sid)
|
||||
n_hands += 1
|
||||
n_villains = 0
|
||||
for v in ex.get("villains") or []:
|
||||
if _real_handle(v.get("name")):
|
||||
poker.upsert_player(name=v["name"], venue=ex.get("venue"),
|
||||
description=v.get("description"), tendencies=v.get("tendencies"),
|
||||
adjustment=v.get("adjustment"), category=v.get("category"))
|
||||
n_villains += 1
|
||||
return {"session_id": sid, "date": ex.get("date"), "venue": ex.get("venue"),
|
||||
"net": ex.get("net"), "hands": n_hands, "villains": n_villains}
|
||||
|
||||
|
||||
def main() -> int:
|
||||
args = sys.argv[1:]
|
||||
commit = "--commit" in args
|
||||
reset = "--reset" in args
|
||||
limit = None
|
||||
for i, a in enumerate(args):
|
||||
if a == "--dry" and i + 1 < len(args) and args[i + 1].isdigit():
|
||||
limit = int(args[i + 1])
|
||||
|
||||
blocks = split_sessions(open(LOG_PATH, encoding="utf-8").read())
|
||||
if limit:
|
||||
blocks = blocks[:limit]
|
||||
print(f"{len(blocks)} session block(s). mode={'COMMIT' if commit else 'DRY-RUN'}")
|
||||
|
||||
if commit and reset:
|
||||
wiped = poker.clear_all()
|
||||
print(f"reset: wiped {wiped}")
|
||||
|
||||
for b in blocks:
|
||||
ex = extract(b)
|
||||
if not ex:
|
||||
print(f" ! could not parse a block: {b[:60]!r}")
|
||||
continue
|
||||
if commit:
|
||||
print(" seeded:", seed(ex, b))
|
||||
else:
|
||||
print(f"\n=== {ex.get('date')} — {ex.get('venue')} {ex.get('stakes')} "
|
||||
f"({ex.get('format')}) net {ex.get('net')} ===")
|
||||
kept = [v.get("name") for v in (ex.get("villains") or []) if _real_handle(v.get("name"))]
|
||||
print(f" hands: {len(ex.get('hands') or [])} | villains kept: {kept}")
|
||||
for h in (ex.get("hands") or [])[:3]:
|
||||
print(f" - {h.get('hero_pos')} {h.get('hero_cards')} "
|
||||
f"net {(h.get('result') or {}).get('hero_net')} [{h.get('tag')}]")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user