series 4 codec work, inital decode success

This commit is contained in:
2026-05-29 06:33:06 +00:00
parent 1bccc44b88
commit 9b71ead44b
20 changed files with 1578 additions and 76 deletions
+135 -23
View File
@@ -467,21 +467,21 @@ class WaveformStore:
Ingest a Thor (Micromate Series IV) IDF event file (`.IDFW` or
`.IDFH`) produced by Thor's TXT exporter.
Thor binaries are stored as opaque bytes — seismo-relay doesn't
yet decode the proprietary IDF binary format (codec slot lives
at ``micromate/idf_file.py``). Device-authoritative metadata
comes from the paired ``.IDFW.txt`` / ``.IDFH.txt`` sidecar
when supplied.
Workflow:
1. Parse the paired TXT report (when supplied) via
``micromate.parse_idf_report`` → dict.
2. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``.
3. Copy bytes verbatim into ``<root>/<serial>/<filename>``.
4. Bridge IdfEvent → ``minimateplus.Event`` (for the existing
sidecar / DB insert machinery) via
``IdfEvent.to_minimateplus_event(waveform_key)``.
5. Write the ``.sfm.json`` sidecar with
1. For sig-A `.IDFW` binaries, decode samples + binary metadata
via ``micromate.idf_file.read_idf_file()``. Failure or
non-IDFW path falls through to the .txt-only flow.
2. Parse the paired TXT report (when supplied) via
``micromate.parse_idf_report`` → dict. TXT remains the
source of truth for fields the binary doesn't yet supply
(full peak set with ZC freq / Time of Peak, sensor self-check,
firmware string, project strings).
3. Wrap parsed dict + filename into a typed ``micromate.IdfEvent``.
4. Copy bytes verbatim into ``<root>/<serial>/<filename>``.
5. Bridge IdfEvent → ``minimateplus.Event`` and attach
``raw_samples`` from the binary decoder (when available).
6. Write the `.h5` clean-waveform file when samples decoded.
7. Write the ``.sfm.json`` sidecar with
``source.kind = "idf-import"`` and the full raw IDF report
under ``extensions.idf_report``.
@@ -490,7 +490,33 @@ class WaveformStore:
"""
from micromate import IdfEvent, parse_idf_report
# Parse the .txt sidecar (best-effort; non-fatal on failure).
# 1. Binary decode (sig-A IDFW and IDFH). Non-fatal: any failure
# leaves samples / binary metadata unfilled and we proceed with
# the .txt path as before.
idf_samples: Optional[dict] = None
idf_intervals: Optional[list] = None
binary_md = None
binary_peaks = None
is_histogram = False
try:
from micromate.idf_file import read_idf_file
res = read_idf_file(source_path)
idf_samples = res.samples or None
idf_intervals = res.intervals
is_histogram = res.intervals is not None
binary_md = res.binary_metadata
binary_peaks = res.event.peaks
except NotImplementedError:
# sig-B — codec doesn't handle this yet.
pass
except Exception as exc:
log.warning(
"save_imported_idf: binary codec failed for %s: %s"
"falling back to .txt-only ingest",
source_path.name, exc,
)
# 2. Parse the .txt sidecar (best-effort; non-fatal on failure).
report_dict: dict = {}
if idf_report_text is not None:
try:
@@ -501,7 +527,38 @@ class WaveformStore:
exc,
)
# Build the typed IdfEvent. Filename is authoritative for
# 3. Backfill report_dict with binary metadata for fields the
# .txt didn't supply. Binary takes precedence on tied fields
# where the binary is more reliable (timestamp, sample_rate),
# and fills in fields entirely missing from the .txt.
if binary_md is not None:
if binary_md.serial and not report_dict.get("serial_number"):
report_dict["serial_number"] = binary_md.serial
if binary_md.event_datetime and not report_dict.get("event_datetime"):
report_dict["event_datetime"] = binary_md.event_datetime
if binary_md.sample_rate and not report_dict.get("sample_rate"):
report_dict["sample_rate"] = binary_md.sample_rate
if binary_md.record_time_sec and not report_dict.get("record_time_sec"):
report_dict["record_time_sec"] = binary_md.record_time_sec
# Calibration date (binary) vs calibration text (.txt) cohabit
# under different keys; no overwrite needed.
if binary_md.event_datetime and not report_dict.get("event_type"):
report_dict["event_type"] = (
"Full Histogram" if is_histogram else "Full Waveform"
)
# Binary-derived peaks fill in when the .txt didn't supply them.
# They're ~3% low vs the device-authoritative .txt values (residual
# codec drift), so .txt always wins when present.
if binary_peaks is not None:
if binary_peaks.transverse_ips and not report_dict.get("tran_ppv"):
report_dict["tran_ppv"] = binary_peaks.transverse_ips
if binary_peaks.vertical_ips and not report_dict.get("vert_ppv"):
report_dict["vert_ppv"] = binary_peaks.vertical_ips
if binary_peaks.longitudinal_ips and not report_dict.get("long_ppv"):
report_dict["long_ppv"] = binary_peaks.longitudinal_ips
# 4. Build the typed IdfEvent. Filename is authoritative for
# (serial, timestamp, kind); the report's event_datetime takes
# precedence over the filename timestamp inside from_report().
idf_event = IdfEvent.from_report(report_dict, source_path.name)
@@ -511,7 +568,7 @@ class WaveformStore:
# serial that overrides a misnamed export).
serial = serial_hint or idf_event.serial or "UNKNOWN"
# Filesystem write.
# 5. Filesystem write of binary bytes.
filename = source_path.name
bw_path = self._serial_dir(serial) / filename
bw_path.write_bytes(idf_bytes)
@@ -523,13 +580,41 @@ class WaveformStore:
# surrogate — every distinct binary maps to a distinct row.
waveform_key = bytes.fromhex(sha256)[:16]
# Bridge to minimateplus.Event for the existing sidecar / DB
# 6. Bridge to minimateplus.Event for the existing sidecar / DB
# insert paths. See IdfEvent.to_minimateplus_event() for the
# caveats of this bridge (mic units, missing fields → sidecar).
ev = idf_event.to_minimateplus_event(waveform_key)
# Write the sidecar. Source kind "idf-import" was added to the
# allow-list in event_file_io.event_to_sidecar_dict for this.
# Attach the decoded sample arrays. Thor's decoder counts use
# LSB = 0.0003 in/s for geo (vs BW's 16-count units at 0.005 in/s)
# — the .h5 writer's geo_range="normal" yields LSB = 10/32768
# ≈ 0.000305 in/s, so plotted samples come out ~1.7% high.
# Acceptable known offset; refine with a Thor-aware h5 path later.
if idf_samples is not None:
ev.raw_samples = idf_samples
n_samples = max((len(idf_samples.get(ch, [])) for ch in ("Tran", "Vert", "Long", "MicL")), default=0)
ev.total_samples = ev.total_samples or n_samples
# 7. Write the .h5 clean-waveform file when we actually have samples.
# Histograms (IDFH) don't have waveform samples — skip h5 for those.
hdf5_filename: Optional[str] = None
if idf_samples is not None and not is_histogram:
hdf5_path = self.hdf5_path_for(serial, filename)
try:
event_hdf5.write_event_hdf5(
hdf5_path, ev,
serial=serial,
geo_range="normal", # Thor's geo full scale is also 10 in/s (Normal)
source_kind="idf-import",
)
hdf5_filename = hdf5_path.name
except Exception as exc:
log.warning(
"save_imported_idf: HDF5 write failed for %s: %s — continuing without .h5",
hdf5_path, exc,
)
# 8. Write the sidecar. Source kind "idf-import" is on the allow-list.
sidecar_path = self.sidecar_path_for(serial, filename)
existing_review = None
if sidecar_path.exists():
@@ -554,19 +639,46 @@ class WaveformStore:
# Time of Peak, sensor self-check, calibration, firmware).
if report_dict:
sidecar["extensions"]["idf_report"] = report_dict
# For histograms, also stash the binary-decoded per-interval
# records so the UI / report layer doesn't need to re-walk the
# IDFH file at render time.
if idf_intervals is not None:
sidecar["extensions"]["idf_intervals"] = [
{
"offset": iv.offset,
"tran_peak": iv.peak_count("Tran"),
"tran_halfp": iv.tran_halfp,
"tran_freq": iv.freq_hz("Tran"),
"vert_peak": iv.peak_count("Vert"),
"vert_halfp": iv.vert_halfp,
"vert_freq": iv.freq_hz("Vert"),
"long_peak": iv.peak_count("Long"),
"long_halfp": iv.long_halfp,
"long_freq": iv.freq_hz("Long"),
"mic_peak": iv.peak_count("MicL"),
"mic_halfp": iv.micl_halfp,
"mic_freq": iv.freq_hz("MicL"),
}
for iv in idf_intervals
]
event_file_io.write_sidecar(sidecar_path, sidecar)
log.info(
"WaveformStore.save_imported_idf serial=%s filename=%s filesize=%d "
"report_attached=%s",
serial, filename, filesize, bool(report_dict),
"kind=%s report_attached=%s binary_decoded=%s h5=%s intervals=%d",
serial, filename, filesize,
"histogram" if is_histogram else "waveform",
bool(report_dict),
(idf_samples is not None) or (idf_intervals is not None),
hdf5_filename or "(skipped)",
len(idf_intervals) if idf_intervals else 0,
)
return ev, {
"filename": filename,
"filesize": filesize,
"sha256": sha256,
"a5_pickle_filename": None,
"hdf5_filename": None,
"hdf5_filename": hdf5_filename,
"sidecar_filename": sidecar_path.name,
"serial": serial,
}