diff --git a/internal/tui/tab_sites.go b/internal/tui/tab_sites.go index 13f45a9..8994757 100644 --- a/internal/tui/tab_sites.go +++ b/internal/tui/tab_sites.go @@ -60,14 +60,18 @@ type siteFormData struct { Regions string } -func latencySparkline(latencies []time.Duration, width int) string { +func latencySparkline(latencies []time.Duration, statuses []bool, width int) string { if len(latencies) == 0 { return subtleStyle.Render(strings.Repeat("·", width)) } samples := latencies + sampledStatuses := statuses if len(samples) > width { samples = samples[len(samples)-width:] + if len(sampledStatuses) > width { + sampledStatuses = sampledStatuses[len(sampledStatuses)-width:] + } } minL, maxL := samples[0], samples[0] @@ -85,7 +89,7 @@ func latencySparkline(latencies []time.Duration, width int) string { sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining))) } spread := maxL - minL - for _, l := range samples { + for i, l := range samples { idx := 0 if spread > 0 { idx = int(float64(l-minL) / float64(spread) * 7) @@ -94,13 +98,18 @@ func latencySparkline(latencies []time.Duration, width int) string { } } ch := string(sparkChars[idx]) - ms := l.Milliseconds() - if ms < 200 { - sb.WriteString(specialStyle.Render(ch)) - } else if ms < 500 { - sb.WriteString(warnStyle.Render(ch)) - } else { + isDown := i < len(sampledStatuses) && !sampledStatuses[i] + if isDown { sb.WriteString(dangerStyle.Render(ch)) + } else { + ms := l.Milliseconds() + if ms < 200 { + sb.WriteString(specialStyle.Render(ch)) + } else if ms < 500 { + sb.WriteString(warnStyle.Render(ch)) + } else { + sb.WriteString(dangerStyle.Render(ch)) + } } } return sb.String() @@ -474,7 +483,7 @@ func (m Model) viewSitesTab() string { if site.Type == "push" { spark = heartbeatSparkline(hist.Statuses, sparkWidth) } else { - spark = latencySparkline(hist.Latencies, sparkWidth) + spark = latencySparkline(hist.Latencies, hist.Statuses, sparkWidth) } rows = append(rows, []string{ @@ -949,7 +958,7 @@ func (m Model) viewDetailPanel() string { up, len(hist.Statuses)) } } else { - b.WriteString(" " + latencySparkline(hist.Latencies, sparkWidth)) + b.WriteString(" " + latencySparkline(hist.Latencies, hist.Statuses, sparkWidth)) if len(hist.Latencies) > 0 { minL, maxL := hist.Latencies[0], hist.Latencies[0] var total time.Duration diff --git a/vhs/backfill/main.go b/vhs/backfill/main.go new file mode 100644 index 0000000..4986c9b --- /dev/null +++ b/vhs/backfill/main.go @@ -0,0 +1,274 @@ +package main + +import ( + "database/sql" + "fmt" + "math/rand" + "os" + "time" + + _ "github.com/mattn/go-sqlite3" +) + +func main() { + if len(os.Args) < 2 { + fmt.Fprintln(os.Stderr, "usage: backfill ") + os.Exit(1) + } + db, err := sql.Open("sqlite3", os.Args[1]) + if err != nil { + fmt.Fprintf(os.Stderr, "open: %v\n", err) + os.Exit(1) + } + defer db.Close() + + ids, err := loadSiteIDs(db) + if err != nil { + fmt.Fprintf(os.Stderr, "load site IDs: %v\n", err) + os.Exit(1) + } + + rng := rand.New(rand.NewSource(42)) + now := time.Now().UTC() + + if err := backfillHistory(db, rng, now, ids); err != nil { + fmt.Fprintf(os.Stderr, "history: %v\n", err) + os.Exit(1) + } + if err := backfillStateChanges(db, now, ids); err != nil { + fmt.Fprintf(os.Stderr, "state changes: %v\n", err) + os.Exit(1) + } + if err := backfillLogs(db, now); err != nil { + fmt.Fprintf(os.Stderr, "logs: %v\n", err) + os.Exit(1) + } + if err := backfillNodes(db, now); err != nil { + fmt.Fprintf(os.Stderr, "nodes: %v\n", err) + os.Exit(1) + } + if err := backfillMaintenance(db, now, ids); err != nil { + fmt.Fprintf(os.Stderr, "maintenance: %v\n", err) + os.Exit(1) + } + + var count int + db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count) + fmt.Printf("Backfill complete: %d check records\n", count) + + var token string + if err := db.QueryRow("SELECT token FROM sites WHERE name='Nightly Backup'").Scan(&token); err == nil { + fmt.Printf("PUSH_TOKEN=%s\n", token) + } +} + +func loadSiteIDs(db *sql.DB) (map[string]int, error) { + rows, err := db.Query("SELECT id, name FROM sites") + if err != nil { + return nil, err + } + defer rows.Close() + ids := make(map[string]int) + for rows.Next() { + var id int + var name string + if err := rows.Scan(&id, &name); err != nil { + return nil, err + } + ids[name] = id + } + return ids, rows.Err() +} + +type monitorProfile struct { + name string + minMs int + maxMs int + downFrom int // check index where DOWN starts (-1 = never) +} + +func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error { + profiles := []monitorProfile{ + {"Nextcloud", 40, 80, -1}, + {"Jellyfin", 80, 200, -1}, + {"Home Assistant", 15, 45, -1}, + {"Gitea", 40, 90, -1}, + {"Traefik Dashboard", 5, 25, -1}, + {"Vaultwarden", 50, 130, -1}, + {"Personal Blog", 25, 65, -1}, + {"Immich", 100, 280, -1}, // spikes handled below + {"Auth Portal", 30, 70, 40}, // DOWN after check 40 + {"Edge Router", 5, 15, -1}, // ping + {"Postgres", 1, 5, -1}, // port + {"DNS Primary", 10, 30, -1}, + } + + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + stmt, err := tx.Prepare("INSERT INTO check_history (site_id, latency_ns, is_up, checked_at) VALUES (?, ?, ?, ?)") + if err != nil { + return err + } + defer stmt.Close() + + const total = 60 + for _, p := range profiles { + siteID, ok := ids[p.name] + if !ok { + continue + } + for i := 0; i < total; i++ { + minutesAgo := (total - i) * 24 + checkedAt := now.Add(-time.Duration(minutesAgo) * time.Minute) + + var latencyNs int64 + isUp := true + + if p.downFrom >= 0 && i >= p.downFrom { + latencyNs = 0 + isUp = false + } else { + ms := p.minMs + rng.Intn(p.maxMs-p.minMs) + if p.name == "Immich" && i%17 == 0 { + ms = 250 + rng.Intn(100) + } + latencyNs = int64(ms) * 1_000_000 + } + + if _, err := stmt.Exec(siteID, latencyNs, isUp, checkedAt.Format("2006-01-02 15:04:05")); err != nil { + return err + } + } + } + return tx.Commit() +} + +func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error { + type sc struct { + name string + from string + to string + reason string + at time.Time + } + changes := []sc{ + {"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-3 * 24 * time.Hour).Add(-5 * time.Minute)}, + {"Nextcloud", "DOWN", "UP", "", now.Add(-3 * 24 * time.Hour)}, + {"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-3 * time.Minute)}, + {"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)}, + {"Auth Portal", "UP", "DOWN", "connection refused", now.Add(-8 * time.Hour)}, + {"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)}, + {"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)}, + } + + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + stmt, err := tx.Prepare("INSERT INTO state_changes (site_id, from_status, to_status, error_reason, changed_at) VALUES (?, ?, ?, ?, ?)") + if err != nil { + return err + } + defer stmt.Close() + + for _, c := range changes { + siteID, ok := ids[c.name] + if !ok { + continue + } + if _, err := stmt.Exec(siteID, c.from, c.to, c.reason, c.at.Format("2006-01-02 15:04:05")); err != nil { + return err + } + } + return tx.Commit() +} + +func backfillLogs(db *sql.DB, now time.Time) error { + type logEntry struct { + msg string + at time.Time + } + logs := []logEntry{ + {"[06:12] Monitor 'Auth Portal' confirmed DOWN: connection refused", now.Add(-8 * time.Hour)}, + {"[06:12] Monitor 'Auth Portal' failed check 2/2", now.Add(-8*time.Hour - 30*time.Second)}, + {"[06:11] Monitor 'Auth Portal' failed check 1/2", now.Add(-8*time.Hour - 60*time.Second)}, + {"[12:33] Monitor 'Immich' recovered (was down 8m)", now.Add(-12 * time.Hour)}, + {"[12:25] Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", now.Add(-12*time.Hour - 8*time.Minute)}, + {"[12:25] Monitor 'Immich' failed check 3/3", now.Add(-12*time.Hour - 8*time.Minute - 30*time.Second)}, + {"[12:25] Monitor 'Immich' failed check 2/3", now.Add(-12*time.Hour - 8*time.Minute - 60*time.Second)}, + {"[12:24] Monitor 'Immich' failed check 1/3", now.Add(-12*time.Hour - 9*time.Minute)}, + {"[06:14] Monitor 'Jellyfin' recovered (was down 3m)", now.Add(-18 * time.Hour)}, + {"[06:11] Monitor 'Jellyfin' confirmed DOWN: connection reset", now.Add(-18*time.Hour - 3*time.Minute)}, + {"[06:11] Monitor 'Jellyfin' failed check 2/2", now.Add(-18*time.Hour - 3*time.Minute - 30*time.Second)}, + {"[06:10] Monitor 'Jellyfin' failed check 1/2", now.Add(-18*time.Hour - 4*time.Minute)}, + {"[23:45] SSL certificate for 'Personal Blog' expires in 42 days", now.Add(-28 * time.Hour)}, + {"[08:00] Loaded check history from database", now.Add(-32*time.Hour - 30*time.Minute)}, + {"[08:00] Engine RESUMED (Active)", now.Add(-32*time.Hour - 30*time.Minute - 5*time.Second)}, + } + + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + stmt, err := tx.Prepare("INSERT INTO logs (message, created_at) VALUES (?, ?)") + if err != nil { + return err + } + defer stmt.Close() + + for _, l := range logs { + if _, err := stmt.Exec(l.msg, l.at.Format("2006-01-02 15:04:05")); err != nil { + return err + } + } + return tx.Commit() +} + +func backfillNodes(db *sql.DB, now time.Time) error { + _, err := db.Exec( + "INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)", + "node-1", "leader", "us-east", now.Format("2006-01-02 15:04:05"), "2026.05.1", + ) + return err +} + +func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error { + tx, err := db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + stmt, err := tx.Prepare("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)") + if err != nil { + return err + } + defer stmt.Close() + + jellyfinID := ids["Jellyfin"] + past := now.Add(-3 * 24 * time.Hour) + if _, err := stmt.Exec(jellyfinID, "Jellyfin upgrade", "Upgrade to v10.10 + plugin updates", "maintenance", + past.Format("2006-01-02 15:04:05"), + past.Add(2*time.Hour).Format("2006-01-02 15:04:05"), + "admin"); err != nil { + return err + } + + future := now.Add(2 * 24 * time.Hour) + if _, err := stmt.Exec(0, "Network switch replacement", "Replacing core switch in rack 2", "maintenance", + future.Format("2006-01-02 15:04:05"), + future.Add(4*time.Hour).Format("2006-01-02 15:04:05"), + "admin"); err != nil { + return err + } + + return tx.Commit() +} diff --git a/vhs/demo.tape b/vhs/demo.tape new file mode 100644 index 0000000..89a7585 --- /dev/null +++ b/vhs/demo.tape @@ -0,0 +1,54 @@ +Set Shell "bash" +Set Width 1400 +Set Height 800 +Set FontSize 14 +Set Padding 20 +Set Framerate 15 +Set TypingSpeed 50ms + +Hide +Type "bash vhs/setup.sh /tmp/uptop-vhs.db" +Enter +Sleep 45s +Show +Sleep 5s + +# Sites tab — hero shot with mixed monitor states +Screenshot vhs/screenshots/monitors.png +Sleep 1s + +# Navigate to Nextcloud (row 6: group + 3 children + Auth Portal) +Down +Sleep 200ms +Down +Sleep 200ms +Down +Sleep 200ms +Down +Sleep 200ms +Down +Sleep 200ms +Type "i" +Sleep 3s +Screenshot vhs/screenshots/detail.png +Sleep 1s + +# Close detail +Escape +Sleep 1s + +# Tab to Alerts +Tab +Sleep 2s +Screenshot vhs/screenshots/alerts.png +Sleep 1s + +# Tab to Logs +Tab +Sleep 2s +Screenshot vhs/screenshots/logs.png +Sleep 1s + +# Quit +Type "q" +Sleep 1s diff --git a/vhs/screenshots/alerts.png b/vhs/screenshots/alerts.png new file mode 100644 index 0000000..40638b5 Binary files /dev/null and b/vhs/screenshots/alerts.png differ diff --git a/vhs/screenshots/detail.png b/vhs/screenshots/detail.png new file mode 100644 index 0000000..1c29a57 Binary files /dev/null and b/vhs/screenshots/detail.png differ diff --git a/vhs/screenshots/logs.png b/vhs/screenshots/logs.png new file mode 100644 index 0000000..a3f655d Binary files /dev/null and b/vhs/screenshots/logs.png differ diff --git a/vhs/screenshots/monitors.png b/vhs/screenshots/monitors.png new file mode 100644 index 0000000..4334125 Binary files /dev/null and b/vhs/screenshots/monitors.png differ diff --git a/vhs/seed.yaml b/vhs/seed.yaml new file mode 100644 index 0000000..16d5d81 --- /dev/null +++ b/vhs/seed.yaml @@ -0,0 +1,141 @@ +alerts: + - name: Discord Homelab + type: discord + settings: + url: https://discord.com/api/webhooks/1234567890/demo-token + + - name: Ntfy Alerts + type: webhook + settings: + url: https://ntfy.example.com/homelab-alerts + + - name: Email Oncall + type: email + settings: + host: smtp.example.com + port: "587" + user: alerts@example.com + pass: "••••••••" + from: alerts@example.com + to: oncall@example.com + + - name: Slack Ops + type: slack + settings: + url: https://hooks.slack.com/services/T00000/B00000/demo-token + +monitors: + # HTTP — homelab services + - name: Nextcloud + type: http + url: https://example.com + interval: 30 + alert: Discord Homelab + check_ssl: true + expiry_threshold: 14 + max_retries: 2 + + - name: Jellyfin + type: http + url: https://example.com + interval: 30 + alert: Discord Homelab + max_retries: 2 + + - name: Home Assistant + type: http + url: https://example.com + interval: 30 + alert: Discord Homelab + max_retries: 3 + + - name: Gitea + type: http + url: https://example.com + interval: 60 + alert: Discord Homelab + check_ssl: true + expiry_threshold: 14 + max_retries: 2 + + - name: Traefik Dashboard + type: http + url: https://example.com + interval: 60 + alert: Discord Homelab + max_retries: 1 + + - name: Vaultwarden + type: http + url: https://example.com + interval: 30 + alert: Discord Homelab + check_ssl: true + expiry_threshold: 14 + max_retries: 3 + + - name: Personal Blog + type: http + url: https://example.com + interval: 120 + alert: Discord Homelab + check_ssl: true + expiry_threshold: 14 + max_retries: 2 + + - name: Immich + type: http + url: https://example.com + interval: 60 + alert: Discord Homelab + check_ssl: true + expiry_threshold: 7 + max_retries: 3 + + # HTTP — deliberate failure + - name: Auth Portal + type: http + url: http://localhost:1 + interval: 30 + alert: Discord Homelab + max_retries: 2 + + # Push — cron jobs + - name: Nightly Backup + type: push + interval: 300 + alert: Discord Homelab + + - name: Cert Renewal + type: push + interval: 300 + alert: Discord Homelab + + # Infrastructure group + - name: Infrastructure + type: group + alert: Discord Homelab + monitors: + - name: Edge Router + type: ping + hostname: 8.8.8.8 + interval: 30 + alert: Discord Homelab + timeout: 5 + + - name: Postgres + type: port + hostname: localhost + port: 18099 + interval: 60 + alert: Discord Homelab + timeout: 5 + + - name: DNS Primary + type: dns + hostname: google.com + dns_server: 8.8.8.8 + dns_resolve_type: A + interval: 60 + alert: Discord Homelab + timeout: 5 diff --git a/vhs/setup.sh b/vhs/setup.sh new file mode 100755 index 0000000..9f8494d --- /dev/null +++ b/vhs/setup.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# VHS screenshot setup: seed monitors, backfill history, start server. +set -e +DB="${1:?usage: setup.sh }" + +rm -f "$DB" "$DB-shm" "$DB-wal" + +echo "==> Seeding monitors and alerts..." +UPTOP_DB_DSN="$DB" ./uptop apply -f vhs/seed.yaml 2>&1 + +echo "==> Backfilling check history..." +BACKFILL_OUT=$(go run ./vhs/backfill/ "$DB") +echo "$BACKFILL_OUT" + +PUSH_TOKEN=$(echo "$BACKFILL_OUT" | grep '^PUSH_TOKEN=' | cut -d= -f2) +if [ -n "$PUSH_TOKEN" ]; then + echo "==> Sending push heartbeat in 15s (background)..." + (sleep 15 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) & +fi + +echo "==> Starting uptop server..." +exec env \ + UPTOP_DB_DSN="$DB" \ + UPTOP_PORT=23299 \ + UPTOP_HTTP_PORT=18099 \ + UPTOP_ALLOW_PRIVATE_TARGETS=true \ + ./uptop serve 2>/dev/null