feat(forward): rate cap + seed-state mode for safe backfill (v1.5.2)

Two safety nets for first-deploy on Blastware ACH machines that
have accumulated tens or hundreds of thousands of historical events
in the watch folder.

1. SFM_MAX_FORWARDS_PER_PASS (default 500, 0=unlimited)
   ---------------------------------------------------
   Cap on the number of events forwarded per scan tick.  At the
   60-second default interval that's ~30K events/hour throughput —
   the SFM server gets a steady drip instead of one giant burst.
   Scan now sorts by mtime ascending so backfill advances
   chronologically (oldest first) and successive scans always
   make progress instead of re-considering the same N newest files.

   Wired into:
     - event_forwarder.find_pending_events / forward_pending
     - series3_watcher.run_watcher loop
     - config-template.ini
     - settings_dialog SFM Forward tab (new "Max Events Per Pass"
       spinbox, validated in _on_save)

2. event_forwarder.py --seed-state CLI
   -----------------------------------
   One-shot mode that walks the watch folder, sha256s every in-window
   event binary, and marks them all as already-forwarded WITHOUT
   POSTing anything.  Run before flipping SFM_FORWARD_ENABLED=true
   to skip the historical backfill entirely — the watcher then only
   forwards events that appear AFTER the seed.

   Usage:
       python event_forwarder.py --seed-state \
           --watch "C:\Blastware 10\Event\autocall home" \
           --state "C:\...\sfm_forwarded.json" \
           [--max-age-days 365]

7 new unit tests:
  - max_per_pass cap enforcement (=N, =0 unlimited, oldest-first
    ordering)
  - seed-state mode (in-window seeding, max-age skip,
    end-to-end skip-after-seed, idempotent re-runs)

README adds a "First-time deployment" section walking through both
options.  Bumps to v1.5.2.
This commit is contained in:
2026-05-10 00:20:10 +00:00
parent 3ee0cae31e
commit 815c643fb2
9 changed files with 390 additions and 15 deletions
+177 -2
View File
@@ -209,6 +209,7 @@ def find_pending_events(
max_age_days: int,
quiescence_seconds: float = DEFAULT_QUIESCENCE_SECONDS,
missing_report_grace_seconds: float = DEFAULT_MISSING_REPORT_GRACE_SECONDS,
max_per_pass: int = 0,
) -> List[Tuple[str, Optional[str]]]:
"""
Walk `watch_dir` and return the list of (binary_path, txt_path_or_None)
@@ -226,6 +227,11 @@ def find_pending_events(
missing_report_grace_seconds, we forward without the TXT.
Younger binaries with a missing TXT are deferred — let BW
finish writing the report.
- When `max_per_pass > 0`, return at most that many pairs.
Older files (lower mtime) are forwarded first so backfill
proceeds chronologically. Use this to drip-feed a folder
with thousands of qualifying events instead of hammering
the SFM server with one giant burst.
"""
if not os.path.isdir(watch_dir):
log.warning("forward scan: watch dir not found: %s", watch_dir)
@@ -248,6 +254,20 @@ def find_pending_events(
# Cache existence of TXT partners so we don't stat() each twice.
names = {e.name for e in entries if e.is_file()}
# Sort by mtime ASCENDING so chronological backfill happens oldest-first.
# When max_per_pass clamps the list, we always advance — we don't get
# stuck re-considering the same N newest files every scan.
def _mtime(entry: os.DirEntry) -> float:
try:
return entry.stat().st_mtime
except OSError:
return 0.0
entries = sorted(
(e for e in entries if e.is_file()),
key=_mtime,
)
for e in entries:
if not e.is_file():
continue
@@ -302,9 +322,13 @@ def find_pending_events(
# Stash size + digest on the tuple-replacement for use during forward;
# callers can re-derive but caching avoids a second sha256.
# Per-pass cap: once we have enough pending, stop scanning.
if max_per_pass and len(pending) >= max_per_pass:
break
log.debug(
"forward scan: %d pending skipped_inflight=%d already_forwarded=%d",
len(pending), skipped_inflight, skipped_already_forwarded,
"forward scan: %d pending skipped_inflight=%d already_forwarded=%d cap=%d",
len(pending), skipped_inflight, skipped_already_forwarded, max_per_pass,
)
return pending
@@ -442,6 +466,7 @@ def forward_pending(
quiescence_seconds: float = DEFAULT_QUIESCENCE_SECONDS,
missing_report_grace_seconds: float = DEFAULT_MISSING_REPORT_GRACE_SECONDS,
timeout: float = DEFAULT_HTTP_TIMEOUT,
max_per_pass: int = 0,
logger: Optional[Any] = None,
) -> Dict[str, int]:
"""
@@ -467,6 +492,7 @@ def forward_pending(
max_age_days=max_age_days,
quiescence_seconds=quiescence_seconds,
missing_report_grace_seconds=missing_report_grace_seconds,
max_per_pass=max_per_pass,
)
counts = {"scanned": len(pending), "forwarded": 0, "errors": 0, "with_report": 0}
@@ -502,3 +528,152 @@ def forward_pending(
)
return counts
# ── Seed-state mode (skip historical backfill on first deploy) ────────────────
def seed_state_from_folder(
watch_dir: str,
state: ForwardState,
*,
max_age_days: int = 365,
logger: Optional[Any] = None,
) -> Dict[str, int]:
"""Walk `watch_dir` and mark every existing event binary as already
forwarded — without POSTing anything.
This is the right tool for a first deploy on a machine that already
has tens or hundreds of thousands of historical events in the BW
ACH folder. Run it ONCE before enabling SFM_FORWARD_ENABLED:
python event_forwarder.py --seed-state \
--watch "C:\\Blastware 10\\Event\\autocall home" \
--state "C:\\...\\sfm_forwarded.json" \
[--max-age-days 365]
The watcher then starts forwarding only events that appear AFTER
the seed run. Files older than `max_age_days` are still skipped
by the regular scan loop — we don't bother seeding them because
they wouldn't be forwarded anyway.
Returns a counts dict:
{"scanned": int, "seeded": int, "already_known": int, "skipped_too_old": int}
"""
def _log(msg: str) -> None:
if logger:
logger(msg)
else:
log.info(msg)
counts = {"scanned": 0, "seeded": 0, "already_known": 0, "skipped_too_old": 0}
if not os.path.isdir(watch_dir):
_log(f"[seed] watch dir not found: {watch_dir}")
return counts
now_ts = time.time()
max_age_seconds = max(1, int(max_age_days)) * 86400.0
try:
with os.scandir(watch_dir) as it:
entries = [e for e in it if e.is_file()]
except OSError as exc:
_log(f"[seed] scandir failed on {watch_dir}: {exc}")
return counts
for e in entries:
if not is_event_binary(e.path):
continue
counts["scanned"] += 1
try:
mtime = e.stat().st_mtime
size = e.stat().st_size
except OSError:
continue
if (now_ts - mtime) > max_age_seconds:
counts["skipped_too_old"] += 1
continue
try:
digest = sha256_of_file(e.path)
except OSError as exc:
_log(f"[seed] sha256 failed for {e.path}: {exc}")
continue
if state.is_forwarded(digest):
counts["already_known"] += 1
continue
state.mark_forwarded(digest, e.name, size)
counts["seeded"] += 1
if counts["seeded"] % 1000 == 0:
_log(f"[seed] progress: {counts['seeded']} seeded so far...")
_log(
f"[seed] done. scanned={counts['scanned']} seeded={counts['seeded']} "
f"already_known={counts['already_known']} "
f"skipped_too_old={counts['skipped_too_old']}"
)
return counts
# ── CLI entry point ─────────────────────────────────────────────────────────
def _main() -> int:
"""Command-line interface for one-shot operations.
Currently supports a single mode:
python event_forwarder.py --seed-state \
--watch "<path/to/BW autocall folder>" \
--state "<path/to/sfm_forwarded.json>" \
[--max-age-days 365]
which marks every existing in-window event binary as already
forwarded (without POSTing) so the watcher only forwards events
appearing AFTER the seed.
"""
import argparse
parser = argparse.ArgumentParser(
description="Series 3 Watcher — SFM event forwarder utilities",
)
parser.add_argument(
"--seed-state", action="store_true",
help="Mark every event binary in --watch as already-forwarded "
"(without POSTing). Use this BEFORE enabling SFM_FORWARD "
"on a machine with a large historical archive.",
)
parser.add_argument(
"--watch", required=True,
help="Path to the Blastware ACH folder.",
)
parser.add_argument(
"--state", required=True,
help="Path to the JSON state file. Will be created if missing.",
)
parser.add_argument(
"--max-age-days", type=int, default=365,
help="Only seed files newer than this many days (default 365).",
)
args = parser.parse_args()
if not args.seed_state:
parser.error("specify --seed-state (no other modes supported yet)")
print(f"[seed] watch_dir = {args.watch}")
print(f"[seed] state = {args.state}")
print(f"[seed] max_age = {args.max_age_days} days")
state = ForwardState(args.state)
print(f"[seed] state currently has {state.count()} entries")
seed_state_from_folder(
args.watch, state,
max_age_days=args.max_age_days,
logger=lambda m: print(m),
)
print(f"[seed] state now has {state.count()} entries")
return 0
if __name__ == "__main__":
import sys
sys.exit(_main())