1fff8179d6
- Created a comprehensive runbook (`wedged_unit_recovery.md`) detailing the recovery process for units stuck in a call-home loop, including symptoms, recovery steps, and explanations of the failure mode. - Added `blind_stop.sh` script to send stop-monitoring commands in a tight loop for unresponsive devices. - Introduced `rescue_device.sh` script to disable Auto Call Home and erase events from a busy device. - Implemented `slow_drip.sh` script to send stop-monitoring frames at a slow rate to prevent UART overrun. - Developed `spam_stop.sh` script to rapidly send stop-monitoring commands to a device. - Created `watch_unit.sh` script for passive monitoring of device reachability, logging results over time.
45 lines
1.6 KiB
Bash
Executable File
45 lines
1.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Hold a single TCP session open and drip stop-monitoring frames at a slow
|
|
# rate, so the device's UART RX FIFO has time to drain between sends.
|
|
#
|
|
# Use when high-rate spam isn't landing — typically because the device's
|
|
# firmware is too busy to drain its serial buffer fast enough and bytes
|
|
# are being lost to UART overrun.
|
|
#
|
|
# Usage:
|
|
# ./slow_drip.sh <host> [tcp_port] [duration_s]
|
|
#
|
|
# Env:
|
|
# DURATION Default: 120 (seconds; arg 3 overrides). Clamped 1..600.
|
|
# INTERVAL Seconds between drip sends (default 3). Lower = more
|
|
# aggressive, more risk of FIFO overrun. Higher = safer
|
|
# but fewer total drips per duration.
|
|
# CONNECT_TIMEOUT Default: 5
|
|
# SFM_BASE_URL Default: http://localhost:8200 (SFM direct).
|
|
|
|
set -u
|
|
|
|
host="${1:-}"
|
|
tcp_port="${2:-9034}"
|
|
duration="${3:-${DURATION:-120}}"
|
|
if [[ -z "$host" ]]; then
|
|
echo "usage: $0 <host> [tcp_port] [duration_s]" >&2
|
|
exit 2
|
|
fi
|
|
|
|
base="${SFM_BASE_URL:-http://localhost:8200}"
|
|
interval="${INTERVAL:-3}"
|
|
connect_timeout="${CONNECT_TIMEOUT:-5}"
|
|
|
|
url="${base}/device/stop_monitoring_slow_drip?host=${host}&tcp_port=${tcp_port}&duration_s=${duration}&interval_s=${interval}&connect_timeout=${connect_timeout}"
|
|
|
|
echo "slow_drip: target ${host}:${tcp_port} duration=${duration}s interval=${interval}s connect_timeout=${connect_timeout}s"
|
|
echo "slow_drip: POST ${url}"
|
|
echo
|
|
|
|
# Give curl enough slack to wait out the duration plus a buffer
|
|
max_time=$(awk -v d="$duration" 'BEGIN { printf "%d", d + 30 }')
|
|
|
|
curl -sS --max-time "$max_time" -X POST "$url"
|
|
echo
|