feat(forward): re-pair late-arriving TXTs on subsequent scans

When a binary is forwarded WITHOUT its paired _ASCII.TXT (because
the TXT wasn't quiescent within the grace period — BW slow to
write, AV scanning, etc.), the old behaviour was to permanently
mark the binary as "done" in the state file, even though the TXT
might land seconds later.  Result: that event lived in SFM forever
with broken-codec peak values and no project info.

Fix: state entries now carry a had_report flag.  Forwards without
a TXT set had_report=False.  On subsequent scans, the watcher
treats had_report=False entries as re-pair candidates — they get
re-forwarded once the TXT appears, and the SFM server's upsert
path (in seismo-relay's insert_events IntegrityError handler)
refreshes the DB row with the report's authoritative values.

Three status states in ForwardState.status(sha256):
  None  — never forwarded.  First-forward path.
  True  — forwarded successfully WITH report (or legacy entry
          without the had_report field).  Permanently done.
  False — forwarded WITHOUT report.  Re-pair if TXT now exists.

Backward compat: legacy state-file entries (no had_report key)
default to True so existing deployments don't unexpectedly
re-forward every entry on upgrade.

Tests cover:
  - re-pair when TXT appears after a had_report=False forward
  - had_report=True entries stay skipped permanently
  - legacy entries (missing field) treated as fully forwarded
  - state.status() returns None for unknown sha
  - re-marking had_report=False then True promotes to fully-done

36 watcher tests pass (was 31, +5 new).
This commit is contained in:
2026-05-11 16:22:53 +00:00
parent e6c25ab941
commit 65b3af90ae
3 changed files with 166 additions and 6 deletions
+89
View File
@@ -285,6 +285,95 @@ class TestFindPendingEvents(unittest.TestCase):
self.assertEqual(os.path.basename(bin_path), "M529LK44.AB0")
self.assertIsNone(txt_path)
def test_re_pair_after_late_arriving_txt(self):
"""If we forwarded the binary alone (TXT was late) and the TXT
later appears, the binary becomes eligible for re-forward."""
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
bin_p = self._make(tmp_p, "M529LK44.AB0",
age_seconds=200, content=b"binary")
# Mark as already-forwarded WITHOUT a paired report (the
# state we'd be in after a TXT-too-late forward).
state = ef.ForwardState(str(tmp_p / "fwd.json"))
digest = ef.sha256_of_file(str(bin_p))
state.mark_forwarded(digest, "M529LK44.AB0", len(b"binary"),
had_report=False)
# First scan: TXT not present yet → still skipped.
pending = ef.find_pending_events(
str(tmp_p), state, max_age_days=30,
quiescence_seconds=5, missing_report_grace_seconds=60,
)
self.assertEqual(pending, [],
"no TXT present → no re-pair attempt")
# Now BW finally writes the TXT.
self._make(tmp_p, "M529LK44.AB0.TXT",
age_seconds=100, content=b"report")
pending = ef.find_pending_events(
str(tmp_p), state, max_age_days=30,
quiescence_seconds=5, missing_report_grace_seconds=60,
)
self.assertEqual(len(pending), 1,
"TXT now present → re-pair attempt expected")
self.assertEqual(os.path.basename(pending[0][0]), "M529LK44.AB0")
self.assertEqual(os.path.basename(pending[0][1]), "M529LK44.AB0.TXT")
def test_re_pair_not_attempted_when_already_had_report(self):
"""Successful WITH-report forwards stay permanently skipped.
Adding more files later does NOT trigger a re-forward."""
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
bin_p = self._make(tmp_p, "M529LK44.AB0", age_seconds=200, content=b"x")
self._make(tmp_p, "M529LK44.AB0.TXT", age_seconds=100, content=b"r")
state = ef.ForwardState(str(tmp_p / "fwd.json"))
state.mark_forwarded(ef.sha256_of_file(str(bin_p)),
"M529LK44.AB0", 1, had_report=True)
pending = ef.find_pending_events(
str(tmp_p), state, max_age_days=30,
quiescence_seconds=5, missing_report_grace_seconds=60,
)
self.assertEqual(pending, [],
"had_report=True forwards stay skipped")
def test_legacy_state_entries_default_to_had_report_true(self):
"""Backward compat: state-file entries from before the
had_report field existed are treated as fully forwarded so
an upgrade doesn't re-forward every entry."""
import json
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
path = str(tmp_p / "fwd.json")
with open(path, "w") as f:
json.dump({
"version": 1,
"forwarded": {
"abc123": {
"filename": "M529LK01.AB0",
"size": 123,
"forwarded_at": "2025-01-01T00:00:00Z",
# No had_report field — legacy entry
}
}
}, f)
state = ef.ForwardState(path)
self.assertIs(state.status("abc123"), True,
"legacy entry must default to 'fully forwarded'")
def test_state_status_returns_none_for_unknown_sha(self):
with tempfile.TemporaryDirectory() as tmp:
state = ef.ForwardState(str(Path(tmp) / "fwd.json"))
self.assertIs(state.status("never-seen"), None)
def test_state_mark_with_had_report_false(self):
with tempfile.TemporaryDirectory() as tmp:
state = ef.ForwardState(str(Path(tmp) / "fwd.json"))
state.mark_forwarded("xyz", "f.AB0", 100, had_report=False)
self.assertIs(state.status("xyz"), False)
# Subsequent re-mark with had_report=True promotes to done.
state.mark_forwarded("xyz", "f.AB0", 100, had_report=True)
self.assertIs(state.status("xyz"), True)
def test_defers_when_txt_missing_and_within_grace(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)