feat(cache): implement integrity checks for cached events and waveforms
- Added `waveform_key` and `event_timestamp` columns to `CachedEvent` and `CachedWaveform` for integrity verification. - Implemented logic to flush the cache when a mismatch in (waveform_key, event_timestamp) is detected during event and waveform updates. - Enhanced `set_events` and `set_waveform` methods to check for mismatches and trigger cache eviction as necessary. - Introduced a new `LiveCache` class to manage in-memory caching of live device data, separating it from the server logic for better testability. - Added tests to verify the correctness of cache invalidation logic, particularly for post-erase key reuse scenarios. - Updated web application to include a "Force refresh" toggle, allowing users to bypass the cache and re-fetch data from the device.
This commit is contained in:
+140
-11
@@ -83,13 +83,24 @@ class CachedEvent(Base):
|
||||
|
||||
Events are immutable once recorded on the device; once we have an event in
|
||||
the cache it never needs to be re-downloaded unless explicitly requested.
|
||||
|
||||
The two extra columns `waveform_key` and `event_timestamp` are an
|
||||
integrity stamp: when set_event() / set_waveform() are called with a
|
||||
different (waveform_key, event_timestamp) for the same (conn_key, index),
|
||||
we know the device was erased and re-recorded — the cached row no longer
|
||||
refers to the same physical event and the entire device's cache is
|
||||
flushed before the new entry is written. This catches the post-erase
|
||||
key-reuse bug where the device's first new event (key 01110000) collides
|
||||
with the first event we previously downloaded.
|
||||
"""
|
||||
__tablename__ = "cached_events"
|
||||
|
||||
conn_key = sa.Column(sa.String, primary_key=True)
|
||||
index = sa.Column(sa.Integer, primary_key=True)
|
||||
event_json = sa.Column(sa.Text, nullable=False) # serialised Event dict
|
||||
cached_at = sa.Column(sa.Float, nullable=False) # Unix timestamp
|
||||
conn_key = sa.Column(sa.String, primary_key=True)
|
||||
index = sa.Column(sa.Integer, primary_key=True)
|
||||
event_json = sa.Column(sa.Text, nullable=False) # serialised Event dict
|
||||
cached_at = sa.Column(sa.Float, nullable=False) # Unix timestamp
|
||||
waveform_key = sa.Column(sa.String, nullable=True) # 8-hex device key
|
||||
event_timestamp = sa.Column(sa.String, nullable=True) # ISO-8601 from 0C
|
||||
|
||||
|
||||
class CachedWaveform(Base):
|
||||
@@ -97,14 +108,18 @@ class CachedWaveform(Base):
|
||||
Full raw ADC waveform for a single event (SUB 5A full download).
|
||||
|
||||
These are large (up to several MB) and expensive to fetch over cellular.
|
||||
Once downloaded they are immutable and cached permanently.
|
||||
Once downloaded they are immutable and cached permanently — but the
|
||||
cache row is invalidated when the device is erased and a new event lands
|
||||
at the same index (see CachedEvent docstring).
|
||||
"""
|
||||
__tablename__ = "cached_waveforms"
|
||||
|
||||
conn_key = sa.Column(sa.String, primary_key=True)
|
||||
index = sa.Column(sa.Integer, primary_key=True)
|
||||
waveform_json = sa.Column(sa.Text, nullable=False) # full /device/event/{idx}/waveform response JSON
|
||||
cached_at = sa.Column(sa.Float, nullable=False)
|
||||
conn_key = sa.Column(sa.String, primary_key=True)
|
||||
index = sa.Column(sa.Integer, primary_key=True)
|
||||
waveform_json = sa.Column(sa.Text, nullable=False) # full /device/event/{idx}/waveform response JSON
|
||||
cached_at = sa.Column(sa.Float, nullable=False)
|
||||
waveform_key = sa.Column(sa.String, nullable=True) # 8-hex device key
|
||||
event_timestamp = sa.Column(sa.String, nullable=True) # ISO-8601 from 0C
|
||||
|
||||
|
||||
class CachedMonitorStatus(Base):
|
||||
@@ -149,6 +164,23 @@ class SFMCache:
|
||||
engine = sa.create_engine(url, connect_args={"check_same_thread": False})
|
||||
Base.metadata.create_all(engine)
|
||||
self._Session = orm.sessionmaker(bind=engine)
|
||||
# In-place schema migration: add the (waveform_key, event_timestamp)
|
||||
# integrity-stamp columns to legacy cache DBs that predate the
|
||||
# post-erase eviction logic. ALTER TABLE ADD COLUMN is idempotent
|
||||
# via the column-presence check below.
|
||||
with engine.begin() as conn:
|
||||
for table in ("cached_events", "cached_waveforms"):
|
||||
cols = {
|
||||
r[1]
|
||||
for r in conn.exec_driver_sql(f"PRAGMA table_info({table})").fetchall()
|
||||
}
|
||||
for new_col, ddl in (
|
||||
("waveform_key", "TEXT"),
|
||||
("event_timestamp", "TEXT"),
|
||||
):
|
||||
if new_col not in cols:
|
||||
log.info("cache schema: %s ADD COLUMN %s %s", table, new_col, ddl)
|
||||
conn.exec_driver_sql(f"ALTER TABLE {table} ADD COLUMN {new_col} {ddl}")
|
||||
log.info("SFM cache opened: %s", db_path)
|
||||
|
||||
# ── Connection key ────────────────────────────────────────────────────────
|
||||
@@ -242,15 +274,91 @@ class SFMCache:
|
||||
row = s.get(CachedEvent, (conn_key, index))
|
||||
return json.loads(row.event_json) if row else None
|
||||
|
||||
@staticmethod
|
||||
def _event_signature(ev: dict) -> tuple[Optional[str], Optional[str]]:
|
||||
"""
|
||||
Extract the (waveform_key_hex, timestamp_iso) integrity stamp from
|
||||
a serialised event dict. Either field may be None if the source
|
||||
Event was missing it; the comparison logic in set_events/set_waveform
|
||||
treats "both sides have a value AND they differ" as the only
|
||||
eviction trigger, so partial data never spuriously flushes cache.
|
||||
"""
|
||||
key = ev.get("waveform_key") or ev.get("_waveform_key")
|
||||
if isinstance(key, (bytes, bytearray)):
|
||||
key = bytes(key).hex()
|
||||
ts = ev.get("timestamp")
|
||||
if isinstance(ts, dict):
|
||||
# _serialise_timestamp returns a dict like {"iso": "...", ...}
|
||||
ts = ts.get("iso") or ts.get("string") or None
|
||||
return (key if isinstance(key, str) else None,
|
||||
ts if isinstance(ts, str) else None)
|
||||
|
||||
def _maybe_flush_on_mismatch(
|
||||
self,
|
||||
s,
|
||||
conn_key: str,
|
||||
index: int,
|
||||
new_key: Optional[str],
|
||||
new_ts: Optional[str],
|
||||
) -> bool:
|
||||
"""
|
||||
Check whether the cached entry at (conn_key, index) has a different
|
||||
(waveform_key, timestamp) than the incoming one. If so, treat it as
|
||||
a post-erase key-reuse signal and flush ALL cached events/waveforms
|
||||
for this device, then return True.
|
||||
Returns False when no flush was needed.
|
||||
"""
|
||||
if not new_key and not new_ts:
|
||||
return False # nothing to compare against
|
||||
existing = s.get(CachedEvent, (conn_key, index))
|
||||
if existing is None:
|
||||
existing = s.get(CachedWaveform, (conn_key, index))
|
||||
if existing is None:
|
||||
return False
|
||||
old_key = existing.waveform_key
|
||||
old_ts = existing.event_timestamp
|
||||
# Only flush when both sides have populated values and they differ.
|
||||
differs = (
|
||||
(new_key and old_key and new_key != old_key)
|
||||
or (new_ts and old_ts and new_ts != old_ts)
|
||||
)
|
||||
if not differs:
|
||||
return False
|
||||
log.warning(
|
||||
"cache: device %s — index %d (key=%s, ts=%s) replaces (key=%s, ts=%s); "
|
||||
"flushing all cached events/waveforms for this device "
|
||||
"(post-erase key reuse detected)",
|
||||
conn_key, index, new_key, new_ts, old_key, old_ts,
|
||||
)
|
||||
s.query(CachedEvent).filter_by(conn_key=conn_key).delete()
|
||||
s.query(CachedWaveform).filter_by(conn_key=conn_key).delete()
|
||||
return True
|
||||
|
||||
def set_events(self, conn_key: str, events: list[dict]) -> None:
|
||||
"""
|
||||
Upsert a list of event dicts. Existing rows are updated; new rows are
|
||||
inserted. This is used to add newly-discovered events to the cache.
|
||||
|
||||
Eviction: if any incoming event has a different (waveform_key,
|
||||
timestamp) than the row currently cached at the same index, we flush
|
||||
the entire device's cache before inserting the new entries. Catches
|
||||
post-erase key reuse where index 0 silently switches identity.
|
||||
"""
|
||||
now = time.time()
|
||||
with self._Session() as s:
|
||||
# Eviction check: scan incoming events for any (index, key, ts)
|
||||
# that conflicts with a cached row. A single conflict triggers
|
||||
# a full device-wide flush so we don't end up with a mixed-era
|
||||
# cache.
|
||||
for ev in events:
|
||||
key, ts = self._event_signature(ev)
|
||||
if self._maybe_flush_on_mismatch(s, conn_key, ev["index"], key, ts):
|
||||
s.commit()
|
||||
break # cache is now empty for this device; carry on
|
||||
|
||||
for ev in events:
|
||||
idx = ev["index"]
|
||||
key, ts = self._event_signature(ev)
|
||||
row = s.get(CachedEvent, (conn_key, idx))
|
||||
if row is None:
|
||||
row = CachedEvent(
|
||||
@@ -258,12 +366,18 @@ class SFMCache:
|
||||
index=idx,
|
||||
event_json=json.dumps(ev),
|
||||
cached_at=now,
|
||||
waveform_key=key,
|
||||
event_timestamp=ts,
|
||||
)
|
||||
s.add(row)
|
||||
log.debug("cached new event %d for %s", idx, conn_key)
|
||||
else:
|
||||
# Refresh in case project_info was backfilled after initial store
|
||||
row.event_json = json.dumps(ev)
|
||||
if key:
|
||||
row.waveform_key = key
|
||||
if ts:
|
||||
row.event_timestamp = ts
|
||||
s.commit()
|
||||
|
||||
# ── Waveforms ─────────────────────────────────────────────────────────────
|
||||
@@ -278,8 +392,16 @@ class SFMCache:
|
||||
return json.loads(row.waveform_json)
|
||||
|
||||
def set_waveform(self, conn_key: str, index: int, waveform: dict) -> None:
|
||||
"""Store a full waveform response dict permanently."""
|
||||
"""
|
||||
Store a full waveform response dict permanently.
|
||||
|
||||
Like set_events, this checks the (waveform_key, timestamp) signature
|
||||
of the incoming entry against what's currently cached at the same
|
||||
index. A mismatch flushes the entire device's cache before insert.
|
||||
"""
|
||||
key, ts = self._event_signature(waveform)
|
||||
with self._Session() as s:
|
||||
self._maybe_flush_on_mismatch(s, conn_key, index, key, ts)
|
||||
row = s.get(CachedWaveform, (conn_key, index))
|
||||
if row is None:
|
||||
row = CachedWaveform(
|
||||
@@ -287,13 +409,20 @@ class SFMCache:
|
||||
index=index,
|
||||
waveform_json=json.dumps(waveform),
|
||||
cached_at=time.time(),
|
||||
waveform_key=key,
|
||||
event_timestamp=ts,
|
||||
)
|
||||
s.add(row)
|
||||
else:
|
||||
row.waveform_json = json.dumps(waveform)
|
||||
row.cached_at = time.time()
|
||||
if key:
|
||||
row.waveform_key = key
|
||||
if ts:
|
||||
row.event_timestamp = ts
|
||||
s.commit()
|
||||
log.debug("cached waveform for %s event %d", conn_key, index)
|
||||
log.debug("cached waveform for %s event %d (key=%s, ts=%s)",
|
||||
conn_key, index, key, ts)
|
||||
|
||||
# ── Monitor status ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
Reference in New Issue
Block a user