Files
lerko 0cad80c352 chore: add VHS tooling for uptop TUI screenshots
Backfill tool, crop tool, demo tape, seed data, and setup script
extracted from the uptop repo for clean separation.
2026-05-29 12:33:39 -04:00

369 lines
11 KiB
Go

package main
import (
"database/sql"
"fmt"
"math/rand/v2"
"os"
"time"
_ "github.com/mattn/go-sqlite3"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "usage: backfill <db-path>")
os.Exit(1)
}
db, err := sql.Open("sqlite3", os.Args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "open: %v\n", err)
os.Exit(1)
}
defer db.Close()
ids, err := loadSiteIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load site IDs: %v\n", err)
os.Exit(1)
}
rng := rand.New(rand.NewPCG(42, 0)) //nolint:gosec // deterministic seed for reproducible demo data
now := time.Now().UTC()
if err := backfillHistory(db, rng, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "history: %v\n", err)
os.Exit(1)
}
if err := backfillStateChanges(db, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "state changes: %v\n", err)
os.Exit(1)
}
if err := backfillLogs(db, now); err != nil {
fmt.Fprintf(os.Stderr, "logs: %v\n", err)
os.Exit(1)
}
if err := backfillNodes(db, now); err != nil {
fmt.Fprintf(os.Stderr, "nodes: %v\n", err)
os.Exit(1)
}
if err := backfillMaintenance(db, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
os.Exit(1)
}
alertIDs, err := loadAlertIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load alert IDs: %v\n", err)
os.Exit(1)
}
if err := backfillAlertHealth(db, now, alertIDs); err != nil {
fmt.Fprintf(os.Stderr, "alert health: %v\n", err)
os.Exit(1)
}
var count int
_ = db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
fmt.Printf("Backfill complete: %d check records\n", count)
var token string
if err := db.QueryRow("SELECT token FROM sites WHERE name='Nightly Backup'").Scan(&token); err == nil {
fmt.Printf("PUSH_TOKEN=%s\n", token)
}
}
func loadSiteIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM sites")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func loadAlertIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM alerts")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func scanNameIDs(rows *sql.Rows) (map[string]int, error) {
defer rows.Close()
ids := make(map[string]int)
for rows.Next() {
var id int
var name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
ids[name] = id
}
return ids, rows.Err()
}
// backfillAlertHealth seeds realistic send health so the Alerts tab shows recent,
// healthy "last sent" times and green health dots instead of "never" across the board.
func backfillAlertHealth(db *sql.DB, now time.Time, alertIDs map[string]int) error {
type health struct {
name string
sentAgo time.Duration
ok bool
sends int
fails int
}
rows := []health{
{"Discord Homelab", 4 * time.Minute, true, 37, 0},
{"Slack Ops", 9 * time.Minute, true, 21, 1},
{"Ntfy Alerts", 1 * time.Hour, true, 12, 0},
{"Email Oncall", 3 * time.Hour, true, 5, 0},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, r := range rows {
id, ok := alertIDs[r.name]
if !ok {
continue
}
sentAt := now.Add(-r.sentAgo).Format("2006-01-02 15:04:05")
if _, err := stmt.Exec(id, sentAt, r.ok, "", r.sends, r.fails); err != nil {
return err
}
}
return tx.Commit()
}
type monitorProfile struct {
name string
minMs int
maxMs int
downFrom int // first DOWN check index (-1 = always up)
downTo int // exclusive end of the DOWN window; use 60 (total) for a still-down monitor
}
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
// Latency ranges reflect monitoring public services over the internet, so the
// detail histogram brackets the live latency the engine measures at capture time.
// 60 checks * 24m spacing = a 24h window; dip indices place outages within it.
profiles := []monitorProfile{
{"Nextcloud", 200, 600, 47, 48}, // brief blip ~5h ago, recovered
{"Jellyfin", 40, 180, 15, 16}, // brief blip ~18h ago, recovered
{"Home Assistant", 30, 120, -1, 0}, //
{"Gitea", 50, 200, -1, 0}, //
{"Traefik Dashboard", 60, 200, -1, 0}, //
{"Vaultwarden", 80, 250, -1, 0}, //
{"Personal Blog", 40, 160, -1, 0}, //
{"Immich", 60, 300, 30, 31}, // brief blip ~12h ago; periodic spikes below
{"Auth Portal", 30, 90, 40, 60}, // DOWN ~8h ago, still down
{"Edge Router", 5, 20, -1, 0}, // ping
{"Postgres", 1, 6, -1, 0}, // port
{"DNS Primary", 8, 30, -1, 0}, // dns
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO check_history (site_id, latency_ns, is_up, checked_at) VALUES (?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
const total = 60
for _, p := range profiles {
siteID, ok := ids[p.name]
if !ok {
continue
}
for i := 0; i < total; i++ {
minutesAgo := (total - i) * 24
checkedAt := now.Add(-time.Duration(minutesAgo) * time.Minute)
var latencyNs int64
isUp := true
if p.downFrom >= 0 && i >= p.downFrom && i < p.downTo {
latencyNs = 0
isUp = false
} else {
ms := p.minMs + rng.IntN(p.maxMs-p.minMs)
if p.name == "Immich" && i%17 == 0 {
ms = 250 + rng.IntN(100)
}
latencyNs = int64(ms) * 1_000_000
}
if _, err := stmt.Exec(siteID, latencyNs, isUp, checkedAt.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
}
return tx.Commit()
}
func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
type sc struct {
name string
from string
to string
reason string
at time.Time
}
// Timed to line up with the history dips (Nextcloud ~5h, Immich ~12h, Jellyfin ~18h)
// and the still-down Auth Portal (~8h), so detail panels read coherently.
changes := []sc{
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-5 * time.Hour).Add(-8 * time.Minute)},
{"Nextcloud", "DOWN", "UP", "", now.Add(-5 * time.Hour)},
{"Auth Portal", "UP", "DOWN", "no such host", now.Add(-8 * time.Hour)},
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-5 * time.Minute)},
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO state_changes (site_id, from_status, to_status, error_reason, changed_at) VALUES (?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, c := range changes {
siteID, ok := ids[c.name]
if !ok {
continue
}
if _, err := stmt.Exec(siteID, c.from, c.to, c.reason, c.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
return tx.Commit()
}
func backfillLogs(db *sql.DB, now time.Time) error {
type logEntry struct {
text string
at time.Time
}
ago := func(h, m, s int) time.Time {
return now.Add(-(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute + time.Duration(s)*time.Second))
}
// Ordered newest-first. The bracket time is derived from `at` (not hardcoded), so the
// Logs view — which renders the leading [HH:MM] — reads chronologically. Outage times
// line up with the state changes and history dips above.
logs := []logEntry{
{"Monitor 'Nextcloud' recovered (was down 8m)", ago(5, 0, 0)},
{"Monitor 'Nextcloud' confirmed DOWN: read timeout", ago(5, 8, 0)},
{"Monitor 'Nextcloud' failed check 2/2", ago(5, 8, 30)},
{"Monitor 'Nextcloud' failed check 1/2", ago(5, 9, 0)},
{"Monitor 'Auth Portal' confirmed DOWN: no such host", ago(8, 0, 0)},
{"Monitor 'Auth Portal' failed check 2/2", ago(8, 0, 30)},
{"Monitor 'Auth Portal' failed check 1/2", ago(8, 1, 0)},
{"Monitor 'Immich' recovered (was down 8m)", ago(12, 0, 0)},
{"Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", ago(12, 8, 0)},
{"Monitor 'Immich' failed check 3/3", ago(12, 8, 30)},
{"Monitor 'Immich' failed check 2/3", ago(12, 9, 0)},
{"Monitor 'Immich' failed check 1/3", ago(12, 9, 30)},
{"Monitor 'Jellyfin' recovered (was down 5m)", ago(18, 0, 0)},
{"Monitor 'Jellyfin' confirmed DOWN: connection reset", ago(18, 5, 0)},
{"Monitor 'Jellyfin' failed check 2/2", ago(18, 5, 30)},
{"Monitor 'Jellyfin' failed check 1/2", ago(18, 6, 0)},
{"SSL warning: certificate for 'Personal Blog' expires in 9 days", ago(20, 0, 0)},
{"Engine RESUMED (Active)", ago(22, 0, 0)},
{"Loaded check history from database", ago(22, 0, 5)},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO logs (message, created_at) VALUES (?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, l := range logs {
// Bracket in local time to match the engine's live AddLog timestamps;
// created_at stays UTC to match the store's CURRENT_TIMESTAMP ordering.
msg := "[" + l.at.Local().Format("15:04") + "] " + l.text
if _, err := stmt.Exec(msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
return tx.Commit()
}
func backfillNodes(db *sql.DB, now time.Time) error {
// Multiple regions to show distributed probes. All seen "now" so they read ONLINE
// for the whole capture window (kept under the 60s freshness threshold by the tape).
nodes := []struct{ id, name, region string }{
{"node-use1", "leader", "us-east"},
{"node-euw1", "probe-eu", "eu-west"},
{"node-apse1", "probe-ap", "ap-southeast"},
}
ts := now.Format("2006-01-02 15:04:05")
for _, n := range nodes {
if _, err := db.Exec(
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
n.id, n.name, n.region, ts, "2026.05.1",
); err != nil {
return err
}
}
return nil
}
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
jellyfinID := ids["Jellyfin"]
past := now.Add(-3 * 24 * time.Hour)
if _, err := stmt.Exec(jellyfinID, "Jellyfin upgrade", "Upgrade to v10.10 + plugin updates", "maintenance",
past.Format("2006-01-02 15:04:05"),
past.Add(2*time.Hour).Format("2006-01-02 15:04:05"),
"admin"); err != nil {
return err
}
future := now.Add(2 * 24 * time.Hour)
if _, err := stmt.Exec(0, "Network switch replacement", "Replacing core switch in rack 2", "maintenance",
future.Format("2006-01-02 15:04:05"),
future.Add(4*time.Hour).Format("2006-01-02 15:04:05"),
"admin"); err != nil {
return err
}
return tx.Commit()
}