codec-re: 30 NN partial finding — sum matches but per-sample distribution doesn't

Tested the 12-bit signed packed delta hypothesis (motivated by the
observation that ±2047 in 16-count units ≈ ±32K raw ADC counts, almost
exactly the int16 ADC range — a strong design hint).

Result: mixed.  For SP0 block @1689 (V seg 4, samples 650..653):
  truth deltas:                47, 297, 384, 61   (sum = 789)
  12-bit BE contiguous pred:   17,  47, 664, 61   (sum = 789)

Positions 1 and 3 of the pred match truth values at positions 0 and 3
exactly, AND the total sum across all 4 positions matches.  But
positions 0 and 2 of pred don't match any truth value.

Hypothesis space narrows to:
- 12-bit deltas WITH a specific re-ordering or interleaving
- 12-bit deltas with one of the positions being a "step size" or
  "checksum-like" repacked value
- A nonlinear / coded format where the underlying total displacement
  is preserved but per-sample distribution is encoded differently

Two analysis scripts committed (test_30nn_12bit.py, test_30nn_v2.py).
The v2 script uses a real-decoder simulation to get the exact channel
+ sample-index for each 30 NN block, eliminating off-by-one errors in
the truth lookup.
This commit is contained in:
Claude
2026-05-12 04:41:59 +00:00
committed by serversdown
parent ce5dc640ba
commit d4cdce77fa
2 changed files with 336 additions and 0 deletions
+195
View File
@@ -0,0 +1,195 @@
"""Test 12-bit signed packed deltas hypothesis for 30 NN blocks across all loud events.
For each 30 NN block in each event, identify what samples it should cover
(based on the cumulative delta count up to that point) and compare the
truth deltas against various 12-bit packing schemes.
"""
import sys
sys.path.insert(0, ".")
from analysis.load_bundle import _parse_txt
from minimateplus.waveform_codec import walk_body, find_data_start
CHANNEL_ORDER = ["Vert", "Long", "MicL", "Tran"] # rotation after initial T
def s12(v):
"""Sign-extend a 12-bit unsigned value to signed int."""
return v if v < 0x800 else v - 0x1000
def unpack_12bit_be(data):
"""4 deltas in 6 bytes, BE order: byte[0:1.5], byte[1.5:3], byte[3:4.5], byte[4.5:6]."""
# bits 0..47 (MSB-first), split into 4 × 12-bit
val = int.from_bytes(data, "big")
out = []
for i in range(4):
d = (val >> (12 * (3 - i))) & 0xFFF
out.append(s12(d))
return out
def unpack_12bit_le(data):
"""4 deltas in 6 bytes, LE order: bytes packed as 2 × 24-bit groups."""
out = []
# First 3 bytes contain 2 deltas
b0, b1, b2 = data[0], data[1], data[2]
d0 = b0 | ((b1 & 0x0F) << 8)
d1 = (b1 >> 4) | (b2 << 4)
out.append(s12(d0))
out.append(s12(d1))
# Next 3 bytes contain 2 more deltas
b3, b4, b5 = data[3], data[4], data[5]
d2 = b3 | ((b4 & 0x0F) << 8)
d3 = (b4 >> 4) | (b5 << 4)
out.append(s12(d2))
out.append(s12(d3))
return out
def unpack_12bit_be_per_triplet(data):
"""4 deltas as 2 triplets of (high4, low8) BE within each 3-byte group."""
out = []
b0, b1, b2 = data[0], data[1], data[2]
d0 = (b0 << 4) | (b1 >> 4)
d1 = ((b1 & 0x0F) << 8) | b2
out.append(s12(d0))
out.append(s12(d1))
b3, b4, b5 = data[3], data[4], data[5]
d2 = (b3 << 4) | (b4 >> 4)
d3 = ((b4 & 0x0F) << 8) | b5
out.append(s12(d2))
out.append(s12(d3))
return out
def truth_deltas_for_block(blocks, block_idx, event_truth, channel):
"""For a 30 NN block at block_idx, determine which samples it covers and
return the truth deltas for those samples.
Walks through all blocks before block_idx (within the same segment) and
counts how many deltas have been emitted for *channel*, starting from the
segment's anchor pair.
"""
# Find the segment header that contains this block.
seg_header_idx = None
for j in range(block_idx, -1, -1):
if blocks[j].tag_hi == 0x40:
seg_header_idx = j
break
if seg_header_idx is None:
# block is in the initial T segment; samples count from sample 2.
first_sample_in_segment = 2
else:
# Anchor pair covers samples [N, N+1] for some N. Subsequent deltas
# are samples [N+2, N+2+1, ...]. We don't actually need to know N
# for this test — just the relative position within the segment.
first_sample_in_segment = 2 # anchor=0,1; deltas start at 2
# Count deltas from segment-data start to block_idx.
delta_count = 0
start_block = seg_header_idx + 1 if seg_header_idx is not None else 0
for j in range(start_block, block_idx):
blk = blocks[j]
if blk.tag_hi == 0x10:
delta_count += blk.tag_lo # NN nibbles = NN deltas
elif blk.tag_hi == 0x20:
delta_count += blk.tag_lo # NN int8 deltas
elif blk.tag_hi == 0x00:
delta_count += blk.tag_lo # RLE zero deltas
# Now the 30 NN block carries NN deltas.
nn = blocks[block_idx].tag_lo
# First sample affected: segment first_sample + delta_count.
# But we ALSO need to know which segment this is, since the segment maps
# to a specific channel and a specific starting absolute sample index.
return first_sample_in_segment + delta_count, nn
def main():
for stem in ("M529LL1A.SP0", "M529LL1L.JQ0", "M529LL1L.V70",
"M529LL1A.SS0", "M529LL1A.SV0"):
path = f"tests/fixtures/5-11-26/{stem}"
with open(path, "rb") as f:
body = f.read()[43:-26]
_, samples = _parse_txt(path + ".TXT")
blocks = walk_body(body, find_data_start(body))
seg_idx = [i for i, b in enumerate(blocks) if b.tag_hi == 0x40]
# Find all 30 NN blocks in DATA section (not trailer).
thirty_blocks = []
for bi, b in enumerate(blocks):
if b.tag_hi != 0x30:
continue
# Determine which segment this is in
seg_num = None
for k, hi in enumerate(seg_idx):
next_hi = seg_idx[k + 1] if k + 1 < len(seg_idx) else len(blocks)
if hi < bi < next_hi:
seg_num = k
break
if seg_num is None and seg_idx and bi < seg_idx[0]:
seg_num = -1 # initial T segment
thirty_blocks.append((bi, b, seg_num))
if not thirty_blocks:
continue
print(f"\n=== {stem} ===")
for bi, b, seg_num in thirty_blocks:
# Channel for this segment
if seg_num == -1:
channel = "Tran"
seg_label = "initial T"
else:
channel = CHANNEL_ORDER[seg_num % 4]
seg_label = f"seg {seg_num}"
# Count deltas before this block within the same segment.
seg_header_idx = seg_idx[seg_num] if seg_num >= 0 else -1
start_block = seg_header_idx + 1 if seg_header_idx >= 0 else 0
delta_count = 0
for j in range(start_block, bi):
blk = blocks[j]
if blk.tag_hi in (0x10, 0x20, 0x00):
delta_count += blk.tag_lo
# First sample this 30 NN block affects (within the segment)
# = anchor positions + delta_count + 2 (since anchor pair was samples 0,1)
# But the segment's first absolute sample index in the channel is
# (seg_num // 4) * 512 (approximately) if segment 0 is the first V seg.
cycle = (seg_num // 4) if seg_num >= 0 else 0
base = cycle * 512 + 2 # +2 for anchor pair
sample_idx = base + delta_count
truth_ch = [round(v * 200) for v in samples[channel]]
nn = b.tag_lo
if sample_idx + nn >= len(truth_ch):
print(f" block @ {b.offset} ({seg_label} {channel}): out of truth range")
continue
# Get the previous sample so we can compute truth deltas
if sample_idx == 0:
prev = 0
else:
prev = truth_ch[sample_idx - 1]
truth_deltas = []
for k in range(nn):
truth_deltas.append(truth_ch[sample_idx + k] - (prev if k == 0 else truth_ch[sample_idx + k - 1]))
# Try each packing
schemes = [
("12-bit BE contiguous", unpack_12bit_be(b.data)),
("12-bit LE per-triplet", unpack_12bit_le(b.data)),
("12-bit BE per-triplet", unpack_12bit_be_per_triplet(b.data)),
]
print(f" block @ {b.offset:>5} ({seg_label} {channel}, samples {sample_idx}..{sample_idx+nn-1}):")
print(f" data: {b.data.hex(' ')}")
print(f" truth: {truth_deltas}")
for name, pred in schemes:
match = "" if pred == truth_deltas else " "
n_match = sum(1 for x, y in zip(pred, truth_deltas) if x == y)
print(f" {match}{n_match}/4 {name}: {pred}")
if __name__ == "__main__":
main()