feat(forward): re-pair late-arriving TXTs on subsequent scans
When a binary is forwarded WITHOUT its paired _ASCII.TXT (because
the TXT wasn't quiescent within the grace period — BW slow to
write, AV scanning, etc.), the old behaviour was to permanently
mark the binary as "done" in the state file, even though the TXT
might land seconds later. Result: that event lived in SFM forever
with broken-codec peak values and no project info.
Fix: state entries now carry a had_report flag. Forwards without
a TXT set had_report=False. On subsequent scans, the watcher
treats had_report=False entries as re-pair candidates — they get
re-forwarded once the TXT appears, and the SFM server's upsert
path (in seismo-relay's insert_events IntegrityError handler)
refreshes the DB row with the report's authoritative values.
Three status states in ForwardState.status(sha256):
None — never forwarded. First-forward path.
True — forwarded successfully WITH report (or legacy entry
without the had_report field). Permanently done.
False — forwarded WITHOUT report. Re-pair if TXT now exists.
Backward compat: legacy state-file entries (no had_report key)
default to True so existing deployments don't unexpectedly
re-forward every entry on upgrade.
Tests cover:
- re-pair when TXT appears after a had_report=False forward
- had_report=True entries stay skipped permanently
- legacy entries (missing field) treated as fully forwarded
- state.status() returns None for unknown sha
- re-marking had_report=False then True promotes to fully-done
36 watcher tests pass (was 31, +5 new).
This commit is contained in:
@@ -285,6 +285,95 @@ class TestFindPendingEvents(unittest.TestCase):
|
||||
self.assertEqual(os.path.basename(bin_path), "M529LK44.AB0")
|
||||
self.assertIsNone(txt_path)
|
||||
|
||||
def test_re_pair_after_late_arriving_txt(self):
|
||||
"""If we forwarded the binary alone (TXT was late) and the TXT
|
||||
later appears, the binary becomes eligible for re-forward."""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_p = Path(tmp)
|
||||
bin_p = self._make(tmp_p, "M529LK44.AB0",
|
||||
age_seconds=200, content=b"binary")
|
||||
# Mark as already-forwarded WITHOUT a paired report (the
|
||||
# state we'd be in after a TXT-too-late forward).
|
||||
state = ef.ForwardState(str(tmp_p / "fwd.json"))
|
||||
digest = ef.sha256_of_file(str(bin_p))
|
||||
state.mark_forwarded(digest, "M529LK44.AB0", len(b"binary"),
|
||||
had_report=False)
|
||||
|
||||
# First scan: TXT not present yet → still skipped.
|
||||
pending = ef.find_pending_events(
|
||||
str(tmp_p), state, max_age_days=30,
|
||||
quiescence_seconds=5, missing_report_grace_seconds=60,
|
||||
)
|
||||
self.assertEqual(pending, [],
|
||||
"no TXT present → no re-pair attempt")
|
||||
|
||||
# Now BW finally writes the TXT.
|
||||
self._make(tmp_p, "M529LK44.AB0.TXT",
|
||||
age_seconds=100, content=b"report")
|
||||
pending = ef.find_pending_events(
|
||||
str(tmp_p), state, max_age_days=30,
|
||||
quiescence_seconds=5, missing_report_grace_seconds=60,
|
||||
)
|
||||
self.assertEqual(len(pending), 1,
|
||||
"TXT now present → re-pair attempt expected")
|
||||
self.assertEqual(os.path.basename(pending[0][0]), "M529LK44.AB0")
|
||||
self.assertEqual(os.path.basename(pending[0][1]), "M529LK44.AB0.TXT")
|
||||
|
||||
def test_re_pair_not_attempted_when_already_had_report(self):
|
||||
"""Successful WITH-report forwards stay permanently skipped.
|
||||
Adding more files later does NOT trigger a re-forward."""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_p = Path(tmp)
|
||||
bin_p = self._make(tmp_p, "M529LK44.AB0", age_seconds=200, content=b"x")
|
||||
self._make(tmp_p, "M529LK44.AB0.TXT", age_seconds=100, content=b"r")
|
||||
state = ef.ForwardState(str(tmp_p / "fwd.json"))
|
||||
state.mark_forwarded(ef.sha256_of_file(str(bin_p)),
|
||||
"M529LK44.AB0", 1, had_report=True)
|
||||
pending = ef.find_pending_events(
|
||||
str(tmp_p), state, max_age_days=30,
|
||||
quiescence_seconds=5, missing_report_grace_seconds=60,
|
||||
)
|
||||
self.assertEqual(pending, [],
|
||||
"had_report=True forwards stay skipped")
|
||||
|
||||
def test_legacy_state_entries_default_to_had_report_true(self):
|
||||
"""Backward compat: state-file entries from before the
|
||||
had_report field existed are treated as fully forwarded so
|
||||
an upgrade doesn't re-forward every entry."""
|
||||
import json
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_p = Path(tmp)
|
||||
path = str(tmp_p / "fwd.json")
|
||||
with open(path, "w") as f:
|
||||
json.dump({
|
||||
"version": 1,
|
||||
"forwarded": {
|
||||
"abc123": {
|
||||
"filename": "M529LK01.AB0",
|
||||
"size": 123,
|
||||
"forwarded_at": "2025-01-01T00:00:00Z",
|
||||
# No had_report field — legacy entry
|
||||
}
|
||||
}
|
||||
}, f)
|
||||
state = ef.ForwardState(path)
|
||||
self.assertIs(state.status("abc123"), True,
|
||||
"legacy entry must default to 'fully forwarded'")
|
||||
|
||||
def test_state_status_returns_none_for_unknown_sha(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = ef.ForwardState(str(Path(tmp) / "fwd.json"))
|
||||
self.assertIs(state.status("never-seen"), None)
|
||||
|
||||
def test_state_mark_with_had_report_false(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
state = ef.ForwardState(str(Path(tmp) / "fwd.json"))
|
||||
state.mark_forwarded("xyz", "f.AB0", 100, had_report=False)
|
||||
self.assertIs(state.status("xyz"), False)
|
||||
# Subsequent re-mark with had_report=True promotes to done.
|
||||
state.mark_forwarded("xyz", "f.AB0", 100, had_report=True)
|
||||
self.assertIs(state.status("xyz"), True)
|
||||
|
||||
def test_defers_when_txt_missing_and_within_grace(self):
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
tmp_p = Path(tmp)
|
||||
|
||||
Reference in New Issue
Block a user