From 66dd880f9396b54236d7ae926dc21a730d16fdd2 Mon Sep 17 00:00:00 2001
From: serversdown <brian@serversdown.net>
Date: Fri, 26 Jun 2026 22:36:11 +0000
Subject: [PATCH] feat: canonical structured-hand contract (Lyra->RTO
 transport)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Solidify hand histories into one versioned shape that gets stored, replayed, and
exported — the foundation the tap recorder will emit into and RTO consumes.

- normalize_structured(): single guarantee of the contract shape — canonical cards
  (unicode/10/case -> RankSuit tokens, unknown 'Ax'/'x' preserved), hero synced into
  players[] (RTO finds hero via pos==hero_pos), schema_version stamp, and a
  completeness summary so consumers skip suit-dependent math on partial hands.
  Idempotent; runs on store AND read (legacy rows conform on the way out).
- list_recent_hands: has_structured flag so the export/RTO knows which hands have a
  replayable body worth fetching.
- docs/HAND_HISTORY.md: the shared contract both repos cite (schema, conventions,
  ownership rule, one-way HTTP coupling, transport endpoints).
- replaces the narrow _normalize_parsed (unicode-only) everywhere.

Card format chosen: lists of 2-char tokens (unambiguous, matches what Lyra already
stores + the viewer reads). Unknowns kept + flagged rather than dropped.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/HAND_HISTORY.md      |  72 ++++++++++++++++++++++++++
 lyra/poker.py             | 104 ++++++++++++++++++++++++++++++++------
 tests/test_hand_schema.py | 103 +++++++++++++++++++++++++++++++++++++
 3 files changed, 264 insertions(+), 15 deletions(-)
 create mode 100644 docs/HAND_HISTORY.md
 create mode 100644 tests/test_hand_schema.py

diff --git a/docs/HAND_HISTORY.md b/docs/HAND_HISTORY.md
new file mode 100644
index 0000000..07e093e
--- /dev/null
+++ b/docs/HAND_HISTORY.md
@@ -0,0 +1,72 @@
+# Hand-history contract (Lyra → RTO)
+
+The canonical structured shape for a poker hand. **Lyra owns hands** — it produces this
+shape (LLM parser today; the tap recorder natively, going forward), stores it, replays it
+in the viewer, and exports it. **RTO consumes it** over HTTP and never reaches into Lyra.
+
+Ownership rule: whoever owns the data owns the tools that produce it. Lyra owns the hand
+DB, the viewer, and the copilot loop, so hand capture lives here. RTO is a pure engine.
+
+Coupling: **one arrow, Lyra → RTO, HTTP only.** RTO is a standalone service (solve /
+exploit / estimate); Lyra POSTs to it when it wants analysis. No shared package, no shared
+DB, no shared UI components. If RTO is down, Lyra skips analysis and nothing breaks.
+
+## Schema (`schema_version: 1`)
+
+```jsonc
+{
+  "schema_version": 1,
+  "game": "NLH",                     // NLH | PLO | ...
+  "stakes": "1/3",                   // or null
+  "hero_pos": "BTN",                 // one of POSITIONS
+  "hero_cards": ["Ah", "Kh"],        // convenience mirror of the hero's players[].cards
+  "players": [                       // every player in the hand, incl. hero
+    {"pos": "BTN", "stack": 300, "name": "Hero", "cards": ["Ah","Kh"], "hero": true},
+    {"pos": "BB",  "stack": 250, "name": "Sal",  "cards": null}        // cards: null unless shown
+  ],
+  "actions": [                       // one flat chronological list across all streets
+    {"street": "preflop", "pos": "BTN", "action": "raise", "amount": 15},
+    {"street": "flop", "board": ["7d","2c","5h"]},   // a street begins with its board reveal
+    {"street": "flop", "pos": "BB", "action": "check"}
+  ],
+  "board": ["7d","2c","5h"],         // full final board, 0–5 cards
+  "result": {"pot": 40, "hero_net": 25, "summary": "one line"},
+  "completeness": {"cards": true, "board": true, "actions": true}
+}
+```
+
+### Conventions (load-bearing)
+
+- **Cards are lists of 2-char tokens**, `RankSuit`: rank in `23456789TJQKA` (ten = `T`),
+  suit in `c d h s` (lowercase). E.g. `["As","5d","2c"]`. RTO maps each token via
+  `pokercore.parse_card`. *(Chosen over space-joined strings: unambiguous, no re-splitting,
+  and it's what Lyra already stores + what the viewer reads.)*
+- **Unknown cards are kept, not dropped:** `"Ax"` = known rank / unknown suit, `"x"` =
+  fully unknown card. The LLM parser emits these when Brian didn't state suits. The tap
+  recorder won't — it captures complete cards by construction — so `"x"` is an
+  import/parser-only concern.
+- **`completeness`** tells a consumer what's safe to use: `cards`/`board` are `true` only
+  when every relevant card is fully specified (no `"x"`). RTO uses `false`-card hands for
+  positions/frequencies/pairs and skips suit-dependent math (flushes).
+- **Hero appears in `players[]`** with `"hero": true` and is findable via `pos == hero_pos`.
+  `hero_cards` is a mirror for the viewer; `players[].cards` is the source of truth.
+- **Positions:** `UTG UTG1 UTG2 MP LJ HJ CO BTN SB BB`.
+- **Actions:** `post fold check call bet raise allin`. `amount` is a plain number (no `$`),
+  null for non-sized actions (fold/check). Street boards appear as `{street, board}` entries.
+- **Streets:** `preflop flop turn river`.
+
+`lyra/poker.py:normalize_structured()` is the single function that guarantees this shape.
+It runs on store and on read, and is idempotent.
+
+## Transport (HTTP, Lyra serves on :7078)
+
+- `GET /hands/data?limit=N` → `{ "hands": [ {id, position, hole_cards, board, result, tag,
+  at, lesson, venue, stakes, has_structured}, ... ] }` — flat list for browsing. Use
+  `has_structured` to pick which hands have a replayable body worth fetching.
+- `GET /hand/{id}/data` → the full hand row; `structured` is the object above (or `null`
+  for a flat quick-log that hasn't been reconstructed).
+
+RTO's "Lyra bridge" (its `docs/estimator-design.md`, Phase B) walks `structured.actions`
+to classify each villain decision into `checked_to` / `facing_bet` / `facing_raise`, and
+uses shown `cards` + that street's `board` for board-relative categories. Everything that
+walk needs is in the schema above.
diff --git a/lyra/poker.py b/lyra/poker.py
index cc195bc..e215295 100644
--- a/lyra/poker.py
+++ b/lyra/poker.py
@@ -651,38 +651,102 @@ def _review_session_id() -> int:
     return int(cur.lastrowid)
 
 
+# --- the canonical structured-hand contract (see docs/HAND_HISTORY.md) ---------
+# This is the single shape that gets stored, replayed by the viewer, and exported to
+# RTO. The LLM parser produces it today; the tap recorder will produce it natively.
+HAND_SCHEMA_VERSION = 1
+POSITIONS = ("UTG", "UTG1", "UTG2", "MP", "LJ", "HJ", "CO", "BTN", "SB", "BB")
+ACTION_VERBS = ("post", "fold", "check", "call", "bet", "raise", "allin")
+STREETS = ("preflop", "flop", "turn", "river")
+
 _SUIT_SYM = {"♥": "h", "♦": "d", "♣": "c", "♠": "s"}
 
 
 def _norm_card(c):
+    """Canonicalize one card string: unicode suit -> letter, '10' -> 'T', rank upper,
+    suit lower (e.g. '10♥' -> 'Th', 'as' -> 'As'). Unknown placeholders are preserved:
+    'Ax' = known rank/unknown suit, 'x' = fully unknown card."""
     if not isinstance(c, str):
         return c
     s = c.strip()
     for sym, ltr in _SUIT_SYM.items():
         s = s.replace(sym, ltr)
+    s = s.replace("10", "T")
+    if len(s) == 2:
+        s = s[0].upper() + s[1].lower()  # 'Ax' stays 'Ax'; 'x' (len 1) untouched
     return s
 
 
-def _normalize_parsed(p: dict) -> dict:
-    """Normalize card strings (unicode suits -> letters) across a parsed hand."""
-    if not isinstance(p, dict):
-        return p
-    for key in ("hero_cards", "board"):
-        if isinstance(p.get(key), list):
-            p[key] = [_norm_card(c) for c in p[key]]
+def _card_known(c) -> bool:
+    """True only for a fully specified card (rank+suit, no 'x' placeholder)."""
+    return isinstance(c, str) and len(c) == 2 and "x" not in c.lower()
+
+
+def _completeness(p: dict) -> dict:
+    """Which parts of the hand are fully specified — lets a consumer (RTO) use what it
+    can and skip suit-dependent math (flushes) on hands where suits weren't recorded."""
+    shown = [c for pl in (p.get("players") or []) if isinstance(pl.get("cards"), list)
+             for c in pl["cards"]]
+    hole = list(p.get("hero_cards") or []) + shown
+    return {
+        "cards": bool(hole) and all(_card_known(c) for c in hole),
+        "board": all(_card_known(c) for c in (p.get("board") or [])),
+        "actions": bool(p.get("actions")),
+    }
+
+
+def normalize_structured(parsed: dict) -> dict:
+    """Canonicalize a structured hand — from the LLM parser OR (later) the tap recorder —
+    into the versioned contract shape: normalized cards, the hero synced into players[]
+    (RTO finds the hero via pos == hero_pos), a schema_version stamp, and a completeness
+    summary. Idempotent — the single shape stored, replayed, and exported."""
+    if not isinstance(parsed, dict):
+        return parsed
+    p = dict(parsed)
+    p["schema_version"] = HAND_SCHEMA_VERSION
+    p["hero_cards"] = [_norm_card(c) for c in (p.get("hero_cards") or [])]
+    p["board"] = [_norm_card(c) for c in (p.get("board") or [])]
+
+    players = []
     for pl in p.get("players") or []:
-        if isinstance(pl, dict) and isinstance(pl.get("cards"), list):
+        if not isinstance(pl, dict):
+            continue
+        pl = dict(pl)
+        if isinstance(pl.get("cards"), list):
             pl["cards"] = [_norm_card(c) for c in pl["cards"]]
+        pl.pop("hero", None)  # recomputed below so it can't go stale
+        players.append(pl)
+
+    # Hero must appear in players[] (with cards) — RTO reads the hero off pos==hero_pos.
+    hero_pos = p.get("hero_pos")
+    if hero_pos:
+        hero = next((pl for pl in players if pl.get("pos") == hero_pos), None)
+        if hero is None:
+            hero = {"pos": hero_pos}
+            players.insert(0, hero)
+        hero["hero"] = True
+        if p["hero_cards"] and not hero.get("cards"):
+            hero["cards"] = list(p["hero_cards"])
+    p["players"] = players
+
+    actions = []
     for a in p.get("actions") or []:
-        if isinstance(a, dict) and isinstance(a.get("board"), list):
+        if not isinstance(a, dict):
+            continue
+        a = dict(a)
+        if isinstance(a.get("board"), list):
             a["board"] = [_norm_card(c) for c in a["board"]]
+        actions.append(a)
+    p["actions"] = actions
+
+    p["completeness"] = _completeness(p)
     return p
 
 
 def store_hand_history(parsed: dict, session_id: int | None = None,
                        tag: str | None = None, lesson: str | None = None) -> int:
     """Store a parsed hand: full JSON + extracted flat fields for stats/listing."""
-    parsed = _normalize_parsed(parsed)
+    parsed = normalize_structured(parsed)
     sid = _resolve(session_id) or _review_session_id()
     hero_cards = parsed.get("hero_cards") or []
     board = parsed.get("board") or []
@@ -736,7 +800,7 @@ def reconstruct_hand(hand_id: int, backend: str | None = None) -> dict | None:
     parsed = parse_hand(shorthand, backend=backend)
     if not parsed:
         return None
-    parsed = _normalize_parsed(parsed)
+    parsed = normalize_structured(parsed)
     conn = _c()
     with conn:
         conn.execute("UPDATE poker_hands SET structured = ? WHERE id = ?",
@@ -751,19 +815,29 @@ def get_hand(hand_id: int) -> dict | None:
     if not r:
         return None
     d = dict(r)
-    d["structured"] = json.loads(d["structured"]) if d.get("structured") else None
+    # Normalize on read too: legacy rows predate the contract, and it's idempotent for
+    # new ones — so /hand/{id}/data always serves the current versioned shape.
+    d["structured"] = normalize_structured(json.loads(d["structured"])) if d.get("structured") else None
     return d
 
 
 def list_recent_hands(limit: int = 60) -> list[dict]:
-    """Recent recorded hands with their session's venue/stakes, for browsing."""
+    """Recent recorded hands with their session's venue/stakes, for browsing. Each carries
+    has_structured so a consumer (the export, RTO) knows which hands have a replayable
+    structured body worth fetching via /hand/{id}/data vs. flat quick-logs."""
     rows = _c().execute(
         "SELECT h.id, h.position, h.hole_cards, h.board, h.result, h.tag, h.at, "
-        "h.lesson, s.venue AS venue, s.stakes AS stakes "
+        "h.lesson, (h.structured IS NOT NULL) AS has_structured, "
+        "s.venue AS venue, s.stakes AS stakes "
         "FROM poker_hands h LEFT JOIN poker_sessions s ON s.id = h.session_id "
         "ORDER BY h.id DESC LIMIT ?", (limit,),
     ).fetchall()
-    return [dict(r) for r in rows]
+    out = []
+    for r in rows:
+        d = dict(r)
+        d["has_structured"] = bool(d["has_structured"])
+        out.append(d)
+    return out
 
 
 # --- session recap (.md generation on top of structured data + conversation) ---
diff --git a/tests/test_hand_schema.py b/tests/test_hand_schema.py
new file mode 100644
index 0000000..99a5039
--- /dev/null
+++ b/tests/test_hand_schema.py
@@ -0,0 +1,103 @@
+"""The canonical structured-hand contract (docs/HAND_HISTORY.md): normalize + export.
+
+normalize_structured() is the single guarantee that every stored / replayed / exported
+hand has the versioned shape RTO consumes.
+"""
+from __future__ import annotations
+
+import importlib
+
+import pytest
+
+
+@pytest.fixture
+def poker(tmp_path, monkeypatch):
+    monkeypatch.setenv("LYRA_DB_PATH", str(tmp_path / "test.db"))
+    from lyra import llm
+    monkeypatch.setattr(llm, "embed", lambda texts: [[0.1, 0.2, 0.3] for _ in texts])
+    import lyra.memory as memory
+    importlib.reload(memory)
+    import lyra.poker as poker
+    importlib.reload(poker)
+    return poker
+
+
+def _full_hand():
+    return {
+        "game": "NLH", "stakes": "1/3", "hero_pos": "BTN",
+        "hero_cards": ["ah", "kh"],
+        "players": [
+            {"pos": "BTN", "stack": 300, "name": "Hero"},
+            {"pos": "BB", "stack": 250, "name": "Sal", "cards": ["qs", "qd"]},
+        ],
+        "actions": [
+            {"street": "preflop", "pos": "BTN", "action": "raise", "amount": 15},
+            {"street": "flop", "board": ["7♦", "2♣", "5♥"]},
+            {"street": "flop", "pos": "BB", "action": "check"},
+        ],
+        "board": ["7♦", "2♣", "5♥"],
+        "result": {"pot": 40, "hero_net": 25, "summary": "won at showdown"},
+    }
+
+
+def test_stamps_version(poker):
+    out = poker.normalize_structured({"hero_pos": "CO"})
+    assert out["schema_version"] == poker.HAND_SCHEMA_VERSION
+
+
+def test_card_normalization(poker):
+    out = poker.normalize_structured(_full_hand())
+    assert out["hero_cards"] == ["Ah", "Kh"]            # lowercased input -> canonical
+    assert out["board"] == ["7d", "2c", "5h"]           # unicode suits -> letters
+    assert out["actions"][1]["board"] == ["7d", "2c", "5h"]
+    # ten + suit symbol together
+    assert poker.normalize_structured({"board": ["10♠"]})["board"] == ["Ts"]
+
+
+def test_unknown_cards_preserved(poker):
+    out = poker.normalize_structured({"hero_cards": ["Ax", "x"], "board": ["Ax", "4x", "x"]})
+    assert out["hero_cards"] == ["Ax", "x"]             # placeholders kept, not dropped
+    assert out["completeness"]["cards"] is False
+    assert out["completeness"]["board"] is False
+
+
+def test_hero_synced_into_players(poker):
+    out = poker.normalize_structured(_full_hand())
+    hero = next(p for p in out["players"] if p["pos"] == "BTN")
+    assert hero["hero"] is True
+    assert hero["cards"] == ["Ah", "Kh"]               # mirrored from hero_cards
+    assert sum(1 for p in out["players"] if p.get("hero")) == 1
+
+
+def test_hero_inserted_when_missing_from_players(poker):
+    out = poker.normalize_structured({"hero_pos": "SB", "hero_cards": ["As", "Ad"], "players": []})
+    assert out["players"] == [{"pos": "SB", "hero": True, "cards": ["As", "Ad"]}]
+
+
+def test_completeness_full_hand(poker):
+    c = poker.normalize_structured(_full_hand())["completeness"]
+    assert c == {"cards": True, "board": True, "actions": True}
+
+
+def test_idempotent(poker):
+    once = poker.normalize_structured(_full_hand())
+    twice = poker.normalize_structured(once)
+    assert once == twice
+
+
+def test_store_and_get_roundtrip_is_normalized(poker):
+    sid = poker.start_session(venue="Meadows", stakes="1/3", buy_in=400)
+    hid = poker.store_hand_history(_full_hand(), session_id=sid, tag="well_played")
+    got = poker.get_hand(hid)["structured"]
+    assert got["schema_version"] == poker.HAND_SCHEMA_VERSION
+    assert got["board"] == ["7d", "2c", "5h"]
+    assert got["completeness"]["cards"] is True
+
+
+def test_list_recent_hands_flags_structured(poker):
+    sid = poker.start_session(venue="Meadows", stakes="1/3", buy_in=400)
+    structured_id = poker.store_hand_history(_full_hand(), session_id=sid)
+    flat_id = poker.log_hand(session_id=sid, position="CO", hole_cards="Jc Jd")
+    rows = {r["id"]: r for r in poker.list_recent_hands()}
+    assert rows[structured_id]["has_structured"] is True
+    assert rows[flat_id]["has_structured"] is False