feat(forward): rate cap + seed-state mode for safe backfill (v1.5.2)
Two safety nets for first-deploy on Blastware ACH machines that
have accumulated tens or hundreds of thousands of historical events
in the watch folder.
1. SFM_MAX_FORWARDS_PER_PASS (default 500, 0=unlimited)
---------------------------------------------------
Cap on the number of events forwarded per scan tick. At the
60-second default interval that's ~30K events/hour throughput —
the SFM server gets a steady drip instead of one giant burst.
Scan now sorts by mtime ascending so backfill advances
chronologically (oldest first) and successive scans always
make progress instead of re-considering the same N newest files.
Wired into:
- event_forwarder.find_pending_events / forward_pending
- series3_watcher.run_watcher loop
- config-template.ini
- settings_dialog SFM Forward tab (new "Max Events Per Pass"
spinbox, validated in _on_save)
2. event_forwarder.py --seed-state CLI
-----------------------------------
One-shot mode that walks the watch folder, sha256s every in-window
event binary, and marks them all as already-forwarded WITHOUT
POSTing anything. Run before flipping SFM_FORWARD_ENABLED=true
to skip the historical backfill entirely — the watcher then only
forwards events that appear AFTER the seed.
Usage:
python event_forwarder.py --seed-state \
--watch "C:\Blastware 10\Event\autocall home" \
--state "C:\...\sfm_forwarded.json" \
[--max-age-days 365]
7 new unit tests:
- max_per_pass cap enforcement (=N, =0 unlimited, oldest-first
ordering)
- seed-state mode (in-window seeding, max-age skip,
end-to-end skip-after-seed, idempotent re-runs)
README adds a "First-time deployment" section walking through both
options. Bumps to v1.5.2.
This commit is contained in:
+177
-2
@@ -209,6 +209,7 @@ def find_pending_events(
|
||||
max_age_days: int,
|
||||
quiescence_seconds: float = DEFAULT_QUIESCENCE_SECONDS,
|
||||
missing_report_grace_seconds: float = DEFAULT_MISSING_REPORT_GRACE_SECONDS,
|
||||
max_per_pass: int = 0,
|
||||
) -> List[Tuple[str, Optional[str]]]:
|
||||
"""
|
||||
Walk `watch_dir` and return the list of (binary_path, txt_path_or_None)
|
||||
@@ -226,6 +227,11 @@ def find_pending_events(
|
||||
missing_report_grace_seconds, we forward without the TXT.
|
||||
Younger binaries with a missing TXT are deferred — let BW
|
||||
finish writing the report.
|
||||
- When `max_per_pass > 0`, return at most that many pairs.
|
||||
Older files (lower mtime) are forwarded first so backfill
|
||||
proceeds chronologically. Use this to drip-feed a folder
|
||||
with thousands of qualifying events instead of hammering
|
||||
the SFM server with one giant burst.
|
||||
"""
|
||||
if not os.path.isdir(watch_dir):
|
||||
log.warning("forward scan: watch dir not found: %s", watch_dir)
|
||||
@@ -248,6 +254,20 @@ def find_pending_events(
|
||||
# Cache existence of TXT partners so we don't stat() each twice.
|
||||
names = {e.name for e in entries if e.is_file()}
|
||||
|
||||
# Sort by mtime ASCENDING so chronological backfill happens oldest-first.
|
||||
# When max_per_pass clamps the list, we always advance — we don't get
|
||||
# stuck re-considering the same N newest files every scan.
|
||||
def _mtime(entry: os.DirEntry) -> float:
|
||||
try:
|
||||
return entry.stat().st_mtime
|
||||
except OSError:
|
||||
return 0.0
|
||||
|
||||
entries = sorted(
|
||||
(e for e in entries if e.is_file()),
|
||||
key=_mtime,
|
||||
)
|
||||
|
||||
for e in entries:
|
||||
if not e.is_file():
|
||||
continue
|
||||
@@ -302,9 +322,13 @@ def find_pending_events(
|
||||
# Stash size + digest on the tuple-replacement for use during forward;
|
||||
# callers can re-derive but caching avoids a second sha256.
|
||||
|
||||
# Per-pass cap: once we have enough pending, stop scanning.
|
||||
if max_per_pass and len(pending) >= max_per_pass:
|
||||
break
|
||||
|
||||
log.debug(
|
||||
"forward scan: %d pending skipped_inflight=%d already_forwarded=%d",
|
||||
len(pending), skipped_inflight, skipped_already_forwarded,
|
||||
"forward scan: %d pending skipped_inflight=%d already_forwarded=%d cap=%d",
|
||||
len(pending), skipped_inflight, skipped_already_forwarded, max_per_pass,
|
||||
)
|
||||
return pending
|
||||
|
||||
@@ -442,6 +466,7 @@ def forward_pending(
|
||||
quiescence_seconds: float = DEFAULT_QUIESCENCE_SECONDS,
|
||||
missing_report_grace_seconds: float = DEFAULT_MISSING_REPORT_GRACE_SECONDS,
|
||||
timeout: float = DEFAULT_HTTP_TIMEOUT,
|
||||
max_per_pass: int = 0,
|
||||
logger: Optional[Any] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""
|
||||
@@ -467,6 +492,7 @@ def forward_pending(
|
||||
max_age_days=max_age_days,
|
||||
quiescence_seconds=quiescence_seconds,
|
||||
missing_report_grace_seconds=missing_report_grace_seconds,
|
||||
max_per_pass=max_per_pass,
|
||||
)
|
||||
|
||||
counts = {"scanned": len(pending), "forwarded": 0, "errors": 0, "with_report": 0}
|
||||
@@ -502,3 +528,152 @@ def forward_pending(
|
||||
)
|
||||
|
||||
return counts
|
||||
|
||||
|
||||
# ── Seed-state mode (skip historical backfill on first deploy) ────────────────
|
||||
|
||||
|
||||
def seed_state_from_folder(
|
||||
watch_dir: str,
|
||||
state: ForwardState,
|
||||
*,
|
||||
max_age_days: int = 365,
|
||||
logger: Optional[Any] = None,
|
||||
) -> Dict[str, int]:
|
||||
"""Walk `watch_dir` and mark every existing event binary as already
|
||||
forwarded — without POSTing anything.
|
||||
|
||||
This is the right tool for a first deploy on a machine that already
|
||||
has tens or hundreds of thousands of historical events in the BW
|
||||
ACH folder. Run it ONCE before enabling SFM_FORWARD_ENABLED:
|
||||
|
||||
python event_forwarder.py --seed-state \
|
||||
--watch "C:\\Blastware 10\\Event\\autocall home" \
|
||||
--state "C:\\...\\sfm_forwarded.json" \
|
||||
[--max-age-days 365]
|
||||
|
||||
The watcher then starts forwarding only events that appear AFTER
|
||||
the seed run. Files older than `max_age_days` are still skipped
|
||||
by the regular scan loop — we don't bother seeding them because
|
||||
they wouldn't be forwarded anyway.
|
||||
|
||||
Returns a counts dict:
|
||||
{"scanned": int, "seeded": int, "already_known": int, "skipped_too_old": int}
|
||||
"""
|
||||
def _log(msg: str) -> None:
|
||||
if logger:
|
||||
logger(msg)
|
||||
else:
|
||||
log.info(msg)
|
||||
|
||||
counts = {"scanned": 0, "seeded": 0, "already_known": 0, "skipped_too_old": 0}
|
||||
|
||||
if not os.path.isdir(watch_dir):
|
||||
_log(f"[seed] watch dir not found: {watch_dir}")
|
||||
return counts
|
||||
|
||||
now_ts = time.time()
|
||||
max_age_seconds = max(1, int(max_age_days)) * 86400.0
|
||||
|
||||
try:
|
||||
with os.scandir(watch_dir) as it:
|
||||
entries = [e for e in it if e.is_file()]
|
||||
except OSError as exc:
|
||||
_log(f"[seed] scandir failed on {watch_dir}: {exc}")
|
||||
return counts
|
||||
|
||||
for e in entries:
|
||||
if not is_event_binary(e.path):
|
||||
continue
|
||||
counts["scanned"] += 1
|
||||
try:
|
||||
mtime = e.stat().st_mtime
|
||||
size = e.stat().st_size
|
||||
except OSError:
|
||||
continue
|
||||
if (now_ts - mtime) > max_age_seconds:
|
||||
counts["skipped_too_old"] += 1
|
||||
continue
|
||||
try:
|
||||
digest = sha256_of_file(e.path)
|
||||
except OSError as exc:
|
||||
_log(f"[seed] sha256 failed for {e.path}: {exc}")
|
||||
continue
|
||||
if state.is_forwarded(digest):
|
||||
counts["already_known"] += 1
|
||||
continue
|
||||
state.mark_forwarded(digest, e.name, size)
|
||||
counts["seeded"] += 1
|
||||
if counts["seeded"] % 1000 == 0:
|
||||
_log(f"[seed] progress: {counts['seeded']} seeded so far...")
|
||||
|
||||
_log(
|
||||
f"[seed] done. scanned={counts['scanned']} seeded={counts['seeded']} "
|
||||
f"already_known={counts['already_known']} "
|
||||
f"skipped_too_old={counts['skipped_too_old']}"
|
||||
)
|
||||
return counts
|
||||
|
||||
|
||||
# ── CLI entry point ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _main() -> int:
|
||||
"""Command-line interface for one-shot operations.
|
||||
|
||||
Currently supports a single mode:
|
||||
|
||||
python event_forwarder.py --seed-state \
|
||||
--watch "<path/to/BW autocall folder>" \
|
||||
--state "<path/to/sfm_forwarded.json>" \
|
||||
[--max-age-days 365]
|
||||
|
||||
which marks every existing in-window event binary as already
|
||||
forwarded (without POSTing) so the watcher only forwards events
|
||||
appearing AFTER the seed.
|
||||
"""
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Series 3 Watcher — SFM event forwarder utilities",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed-state", action="store_true",
|
||||
help="Mark every event binary in --watch as already-forwarded "
|
||||
"(without POSTing). Use this BEFORE enabling SFM_FORWARD "
|
||||
"on a machine with a large historical archive.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--watch", required=True,
|
||||
help="Path to the Blastware ACH folder.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--state", required=True,
|
||||
help="Path to the JSON state file. Will be created if missing.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-age-days", type=int, default=365,
|
||||
help="Only seed files newer than this many days (default 365).",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.seed_state:
|
||||
parser.error("specify --seed-state (no other modes supported yet)")
|
||||
|
||||
print(f"[seed] watch_dir = {args.watch}")
|
||||
print(f"[seed] state = {args.state}")
|
||||
print(f"[seed] max_age = {args.max_age_days} days")
|
||||
|
||||
state = ForwardState(args.state)
|
||||
print(f"[seed] state currently has {state.count()} entries")
|
||||
seed_state_from_folder(
|
||||
args.watch, state,
|
||||
max_age_days=args.max_age_days,
|
||||
logger=lambda m: print(m),
|
||||
)
|
||||
print(f"[seed] state now has {state.count()} entries")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.exit(_main())
|
||||
|
||||
Reference in New Issue
Block a user