#!/usr/bin/env bash # Passive monitor for a misbehaving unit. Every INTERVAL seconds, attempts # a single short TCP probe + storage_range read and logs the result. Designed # to run unattended for hours/days and tell you when the unit comes back. # # Usage: # ./watch_unit.sh [tcp_port] # # Env: # INTERVAL Seconds between checks (default 300 = 5 min) # LOG_FILE Append results here (default /tmp/watch_.log) # SFM_BASE_URL Default: http://localhost:8200 set -u host="${1:-}" tcp_port="${2:-9034}" if [[ -z "$host" ]]; then echo "usage: $0 [tcp_port]" >&2 exit 2 fi interval="${INTERVAL:-300}" log_file="${LOG_FILE:-/tmp/watch_${host}.log}" base="${SFM_BASE_URL:-http://localhost:8200}" url="${base}/device/events/storage_range?host=${host}&tcp_port=${tcp_port}" echo "watch_unit: target ${host}:${tcp_port} interval=${interval}s log=${log_file}" echo "watch_unit: Ctrl-C to stop" while true; do ts=$(date '+%Y-%m-%d %H:%M:%S') http_code=$(curl -sS -o /tmp/watch_resp.$$ -w "%{http_code}" \ --max-time 20 "$url" || echo "000") body=$(cat /tmp/watch_resp.$$ 2>/dev/null || true) rm -f /tmp/watch_resp.$$ case "$http_code" in 200|201) # Strip the raw_hex for readability summary=$(echo "$body" | sed 's/"raw_hex":"[^"]*",*//; s/,*$//' | head -c 200) echo "$ts REACHABLE $summary" | tee -a "$log_file" ;; 502|503) err=$(echo "$body" | head -c 150) echo "$ts ERROR_$http_code $err" | tee -a "$log_file" ;; 000) echo "$ts CURL_FAIL (network/timeout)" | tee -a "$log_file" ;; *) echo "$ts HTTP_$http_code $(echo "$body" | head -c 150)" | tee -a "$log_file" ;; esac sleep "$interval" done