feat(forward): rate cap + seed-state mode for safe backfill (v1.5.2)

Two safety nets for first-deploy on Blastware ACH machines that
have accumulated tens or hundreds of thousands of historical events
in the watch folder.

1. SFM_MAX_FORWARDS_PER_PASS (default 500, 0=unlimited)
   ---------------------------------------------------
   Cap on the number of events forwarded per scan tick.  At the
   60-second default interval that's ~30K events/hour throughput —
   the SFM server gets a steady drip instead of one giant burst.
   Scan now sorts by mtime ascending so backfill advances
   chronologically (oldest first) and successive scans always
   make progress instead of re-considering the same N newest files.

   Wired into:
     - event_forwarder.find_pending_events / forward_pending
     - series3_watcher.run_watcher loop
     - config-template.ini
     - settings_dialog SFM Forward tab (new "Max Events Per Pass"
       spinbox, validated in _on_save)

2. event_forwarder.py --seed-state CLI
   -----------------------------------
   One-shot mode that walks the watch folder, sha256s every in-window
   event binary, and marks them all as already-forwarded WITHOUT
   POSTing anything.  Run before flipping SFM_FORWARD_ENABLED=true
   to skip the historical backfill entirely — the watcher then only
   forwards events that appear AFTER the seed.

   Usage:
       python event_forwarder.py --seed-state \
           --watch "C:\Blastware 10\Event\autocall home" \
           --state "C:\...\sfm_forwarded.json" \
           [--max-age-days 365]

7 new unit tests:
  - max_per_pass cap enforcement (=N, =0 unlimited, oldest-first
    ordering)
  - seed-state mode (in-window seeding, max-age skip,
    end-to-end skip-after-seed, idempotent re-runs)

README adds a "First-time deployment" section walking through both
options.  Bumps to v1.5.2.
This commit is contained in:
2026-05-10 00:20:10 +00:00
parent 3ee0cae31e
commit 815c643fb2
9 changed files with 390 additions and 15 deletions
+148
View File
@@ -223,6 +223,154 @@ class TestFindPendingEvents(unittest.TestCase):
)
self.assertEqual(len(pending), 0)
def test_max_per_pass_caps_returned_count(self):
"""When max_per_pass is set, return at most that many pairs."""
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
# Create 5 distinct event binaries with paired .TXTs
for i, name in enumerate(
["M529LK01.AB0", "M529LK02.AB0", "M529LK03.AB0",
"M529LK04.AB0", "M529LK05.AB0"],
):
self._make(tmp_p, name, age_seconds=120 + i,
content=("bin-" + str(i)).encode())
self._make(tmp_p, name + ".TXT", age_seconds=110 + i,
content=b"report")
state = ef.ForwardState(str(tmp_p / "fwd.json"))
pending = ef.find_pending_events(
str(tmp_p), state,
max_age_days=30,
quiescence_seconds=5,
missing_report_grace_seconds=60,
max_per_pass=2,
)
self.assertEqual(len(pending), 2)
def test_max_per_pass_zero_means_unlimited(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
for i in range(4):
self._make(tmp_p, "M529LK0{}.AB0".format(i),
age_seconds=120 + i,
content=("bin-" + str(i)).encode())
state = ef.ForwardState(str(tmp_p / "fwd.json"))
pending = ef.find_pending_events(
str(tmp_p), state,
max_age_days=30,
quiescence_seconds=5,
missing_report_grace_seconds=60,
max_per_pass=0,
)
self.assertEqual(len(pending), 4)
def test_max_per_pass_returns_oldest_first(self):
"""Backfill should advance chronologically — oldest qualifying
files first. This way successive scans always make progress
instead of getting stuck re-considering the same N newest files."""
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
# ages: 200s (oldest), 150s, 100s, 50s (skipped — within grace)
ages = [200, 150, 100, 50]
for i, age in enumerate(ages):
self._make(tmp_p, "M529LK0{}.AB0".format(i),
age_seconds=age, content=("c" + str(i)).encode())
self._make(tmp_p, "M529LK0{}.AB0.TXT".format(i),
age_seconds=age - 10, content=b"r")
state = ef.ForwardState(str(tmp_p / "fwd.json"))
pending = ef.find_pending_events(
str(tmp_p), state,
max_age_days=30, quiescence_seconds=5,
missing_report_grace_seconds=60, max_per_pass=2,
)
# Oldest two should be M529LK00 (200s) and M529LK01 (150s)
names = [os.path.basename(p[0]) for p in pending]
self.assertEqual(names, ["M529LK00.AB0", "M529LK01.AB0"])
# ── Seed-state mode ──────────────────────────────────────────────────────────
class TestSeedStateFromFolder(unittest.TestCase):
def _make(self, dir_path: Path, name: str, age_seconds: float = 100,
content: bytes = b"x") -> Path:
p = dir_path / name
p.write_bytes(content)
target = time.time() - age_seconds
os.utime(p, (target, target))
return p
def test_seeds_every_in_window_event_without_posting(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
for i in range(3):
self._make(tmp_p, "M529LK0{}.AB0".format(i),
age_seconds=120 + i, content=("e" + str(i)).encode())
# Plus a non-event file we should ignore
self._make(tmp_p, "BE11529.MLG", age_seconds=120, content=b"mlg")
state = ef.ForwardState(str(tmp_p / "seed.json"))
counts = ef.seed_state_from_folder(
str(tmp_p), state, max_age_days=30,
)
self.assertEqual(counts["scanned"], 3)
self.assertEqual(counts["seeded"], 3)
self.assertEqual(counts["already_known"], 0)
self.assertEqual(state.count(), 3)
def test_seed_skips_files_beyond_max_age_days(self):
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
self._make(tmp_p, "M529LK01.AB0", age_seconds=120, content=b"new")
self._make(tmp_p, "M529LK02.AB0", age_seconds=10 * 86400,
content=b"in-window") # 10d < 30d cutoff
self._make(tmp_p, "M529LK03.AB0", age_seconds=400 * 86400,
content=b"way-old") # 400d > 30d cutoff
state = ef.ForwardState(str(tmp_p / "seed.json"))
counts = ef.seed_state_from_folder(
str(tmp_p), state, max_age_days=30,
)
self.assertEqual(counts["seeded"], 2)
self.assertEqual(counts["skipped_too_old"], 1)
def test_seeded_files_are_then_skipped_by_normal_scan(self):
"""End-to-end: seed once, then a normal scan should produce
zero pending events for the seeded files."""
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
self._make(tmp_p, "M529LK01.AB0", age_seconds=120, content=b"x")
self._make(tmp_p, "M529LK01.AB0.TXT", age_seconds=110, content=b"r")
self._make(tmp_p, "M529LK02.AB0", age_seconds=120, content=b"y")
self._make(tmp_p, "M529LK02.AB0.TXT", age_seconds=110, content=b"r")
state = ef.ForwardState(str(tmp_p / "seed.json"))
ef.seed_state_from_folder(str(tmp_p), state, max_age_days=30)
pending = ef.find_pending_events(
str(tmp_p), state,
max_age_days=30, quiescence_seconds=5,
missing_report_grace_seconds=60,
)
self.assertEqual(len(pending), 0,
"seed should have marked everything already-forwarded")
def test_seed_is_idempotent(self):
"""Re-running seed twice doesn't duplicate entries or POST anything."""
with tempfile.TemporaryDirectory() as tmp:
tmp_p = Path(tmp)
self._make(tmp_p, "M529LK01.AB0", age_seconds=120, content=b"x")
state = ef.ForwardState(str(tmp_p / "seed.json"))
counts1 = ef.seed_state_from_folder(str(tmp_p), state, max_age_days=30)
counts2 = ef.seed_state_from_folder(str(tmp_p), state, max_age_days=30)
self.assertEqual(counts1["seeded"], 1)
self.assertEqual(counts2["seeded"], 0)
self.assertEqual(counts2["already_known"], 1)
self.assertEqual(state.count(), 1)
# ── Multipart encoder ────────────────────────────────────────────────────────