chore(tui): polish demo + regenerate screenshots
Rework the VHS demo so the README screenshots actually entice a download. Demo data / tooling: - seed.yaml: real, reachable service URLs (detail now shows nextcloud.com, not example.com); Auth Portal -> non-resolving home.arpa host so it reads as a believable, reliably-DOWN monitor - backfill: transient outages for Nextcloud/Jellyfin/Immich aligned with their state changes (uptime % now matches); log timestamps derived from now so the Logs view reads chronologically; real SSL warning; three probe nodes across regions; seeded alert send health - demo.tape: shorter warm-up, added Nodes + theme captures, ordered so every shot stays inside the 60s node-freshness window (consistent probe count) - vhs/crop: new tool to trim the empty terminal border around each screenshot - setup.sh: build backfill up front for deterministic timing; UPTOP_DEMO=1 Supporting code: - persist alert send health (new alert_health table, load on startup, best-effort save on send) so health/last-sent survive restarts - latency Min/Avg/Max ignore failed checks (no more "Min 0ms") - correct "probe"/"probes" pluralization - stable status dot instead of an animated spinner under UPTOP_DEMO
This commit is contained in:
+136
-42
@@ -51,6 +51,15 @@ func main() {
|
||||
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
alertIDs, err := loadAlertIDs(db)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "load alert IDs: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillAlertHealth(db, now, alertIDs); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "alert health: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var count int
|
||||
_ = db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
|
||||
@@ -67,6 +76,18 @@ func loadSiteIDs(db *sql.DB) (map[string]int, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return scanNameIDs(rows)
|
||||
}
|
||||
|
||||
func loadAlertIDs(db *sql.DB) (map[string]int, error) {
|
||||
rows, err := db.Query("SELECT id, name FROM alerts")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return scanNameIDs(rows)
|
||||
}
|
||||
|
||||
func scanNameIDs(rows *sql.Rows) (map[string]int, error) {
|
||||
defer rows.Close()
|
||||
ids := make(map[string]int)
|
||||
for rows.Next() {
|
||||
@@ -80,27 +101,73 @@ func loadSiteIDs(db *sql.DB) (map[string]int, error) {
|
||||
return ids, rows.Err()
|
||||
}
|
||||
|
||||
// backfillAlertHealth seeds realistic send health so the Alerts tab shows recent,
|
||||
// healthy "last sent" times and green health dots instead of "never" across the board.
|
||||
func backfillAlertHealth(db *sql.DB, now time.Time, alertIDs map[string]int) error {
|
||||
type health struct {
|
||||
name string
|
||||
sentAgo time.Duration
|
||||
ok bool
|
||||
sends int
|
||||
fails int
|
||||
}
|
||||
rows := []health{
|
||||
{"Discord Homelab", 4 * time.Minute, true, 37, 0},
|
||||
{"Slack Ops", 9 * time.Minute, true, 21, 1},
|
||||
{"Ntfy Alerts", 1 * time.Hour, true, 12, 0},
|
||||
{"Email Oncall", 3 * time.Hour, true, 5, 0},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, r := range rows {
|
||||
id, ok := alertIDs[r.name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
sentAt := now.Add(-r.sentAgo).Format("2006-01-02 15:04:05")
|
||||
if _, err := stmt.Exec(id, sentAt, r.ok, "", r.sends, r.fails); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
type monitorProfile struct {
|
||||
name string
|
||||
minMs int
|
||||
maxMs int
|
||||
downFrom int // check index where DOWN starts (-1 = never)
|
||||
downFrom int // first DOWN check index (-1 = always up)
|
||||
downTo int // exclusive end of the DOWN window; use 60 (total) for a still-down monitor
|
||||
}
|
||||
|
||||
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
|
||||
// Latency ranges reflect monitoring public services over the internet, so the
|
||||
// detail histogram brackets the live latency the engine measures at capture time.
|
||||
// 60 checks * 24m spacing = a 24h window; dip indices place outages within it.
|
||||
profiles := []monitorProfile{
|
||||
{"Nextcloud", 40, 80, -1},
|
||||
{"Jellyfin", 80, 200, -1},
|
||||
{"Home Assistant", 15, 45, -1},
|
||||
{"Gitea", 40, 90, -1},
|
||||
{"Traefik Dashboard", 5, 25, -1},
|
||||
{"Vaultwarden", 50, 130, -1},
|
||||
{"Personal Blog", 25, 65, -1},
|
||||
{"Immich", 100, 280, -1}, // spikes handled below
|
||||
{"Auth Portal", 30, 70, 40}, // DOWN after check 40
|
||||
{"Edge Router", 5, 15, -1}, // ping
|
||||
{"Postgres", 1, 5, -1}, // port
|
||||
{"DNS Primary", 10, 30, -1},
|
||||
{"Nextcloud", 200, 600, 47, 48}, // brief blip ~5h ago, recovered
|
||||
{"Jellyfin", 40, 180, 15, 16}, // brief blip ~18h ago, recovered
|
||||
{"Home Assistant", 30, 120, -1, 0}, //
|
||||
{"Gitea", 50, 200, -1, 0}, //
|
||||
{"Traefik Dashboard", 60, 200, -1, 0}, //
|
||||
{"Vaultwarden", 80, 250, -1, 0}, //
|
||||
{"Personal Blog", 40, 160, -1, 0}, //
|
||||
{"Immich", 60, 300, 30, 31}, // brief blip ~12h ago; periodic spikes below
|
||||
{"Auth Portal", 30, 90, 40, 60}, // DOWN ~8h ago, still down
|
||||
{"Edge Router", 5, 20, -1, 0}, // ping
|
||||
{"Postgres", 1, 6, -1, 0}, // port
|
||||
{"DNS Primary", 8, 30, -1, 0}, // dns
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
@@ -128,7 +195,7 @@ func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]i
|
||||
var latencyNs int64
|
||||
isUp := true
|
||||
|
||||
if p.downFrom >= 0 && i >= p.downFrom {
|
||||
if p.downFrom >= 0 && i >= p.downFrom && i < p.downTo {
|
||||
latencyNs = 0
|
||||
isUp = false
|
||||
} else {
|
||||
@@ -155,14 +222,16 @@ func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
reason string
|
||||
at time.Time
|
||||
}
|
||||
// Timed to line up with the history dips (Nextcloud ~5h, Immich ~12h, Jellyfin ~18h)
|
||||
// and the still-down Auth Portal (~8h), so detail panels read coherently.
|
||||
changes := []sc{
|
||||
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-3 * 24 * time.Hour).Add(-5 * time.Minute)},
|
||||
{"Nextcloud", "DOWN", "UP", "", now.Add(-3 * 24 * time.Hour)},
|
||||
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-3 * time.Minute)},
|
||||
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
|
||||
{"Auth Portal", "UP", "DOWN", "connection refused", now.Add(-8 * time.Hour)},
|
||||
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-5 * time.Hour).Add(-8 * time.Minute)},
|
||||
{"Nextcloud", "DOWN", "UP", "", now.Add(-5 * time.Hour)},
|
||||
{"Auth Portal", "UP", "DOWN", "no such host", now.Add(-8 * time.Hour)},
|
||||
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
|
||||
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
|
||||
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-5 * time.Minute)},
|
||||
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
@@ -191,25 +260,35 @@ func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
|
||||
func backfillLogs(db *sql.DB, now time.Time) error {
|
||||
type logEntry struct {
|
||||
msg string
|
||||
at time.Time
|
||||
text string
|
||||
at time.Time
|
||||
}
|
||||
ago := func(h, m, s int) time.Time {
|
||||
return now.Add(-(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute + time.Duration(s)*time.Second))
|
||||
}
|
||||
// Ordered newest-first. The bracket time is derived from `at` (not hardcoded), so the
|
||||
// Logs view — which renders the leading [HH:MM] — reads chronologically. Outage times
|
||||
// line up with the state changes and history dips above.
|
||||
logs := []logEntry{
|
||||
{"[06:12] Monitor 'Auth Portal' confirmed DOWN: connection refused", now.Add(-8 * time.Hour)},
|
||||
{"[06:12] Monitor 'Auth Portal' failed check 2/2", now.Add(-8*time.Hour - 30*time.Second)},
|
||||
{"[06:11] Monitor 'Auth Portal' failed check 1/2", now.Add(-8*time.Hour - 60*time.Second)},
|
||||
{"[12:33] Monitor 'Immich' recovered (was down 8m)", now.Add(-12 * time.Hour)},
|
||||
{"[12:25] Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", now.Add(-12*time.Hour - 8*time.Minute)},
|
||||
{"[12:25] Monitor 'Immich' failed check 3/3", now.Add(-12*time.Hour - 8*time.Minute - 30*time.Second)},
|
||||
{"[12:25] Monitor 'Immich' failed check 2/3", now.Add(-12*time.Hour - 8*time.Minute - 60*time.Second)},
|
||||
{"[12:24] Monitor 'Immich' failed check 1/3", now.Add(-12*time.Hour - 9*time.Minute)},
|
||||
{"[06:14] Monitor 'Jellyfin' recovered (was down 3m)", now.Add(-18 * time.Hour)},
|
||||
{"[06:11] Monitor 'Jellyfin' confirmed DOWN: connection reset", now.Add(-18*time.Hour - 3*time.Minute)},
|
||||
{"[06:11] Monitor 'Jellyfin' failed check 2/2", now.Add(-18*time.Hour - 3*time.Minute - 30*time.Second)},
|
||||
{"[06:10] Monitor 'Jellyfin' failed check 1/2", now.Add(-18*time.Hour - 4*time.Minute)},
|
||||
{"[23:45] SSL certificate for 'Personal Blog' expires in 42 days", now.Add(-28 * time.Hour)},
|
||||
{"[08:00] Loaded check history from database", now.Add(-32*time.Hour - 30*time.Minute)},
|
||||
{"[08:00] Engine RESUMED (Active)", now.Add(-32*time.Hour - 30*time.Minute - 5*time.Second)},
|
||||
{"Monitor 'Nextcloud' recovered (was down 8m)", ago(5, 0, 0)},
|
||||
{"Monitor 'Nextcloud' confirmed DOWN: read timeout", ago(5, 8, 0)},
|
||||
{"Monitor 'Nextcloud' failed check 2/2", ago(5, 8, 30)},
|
||||
{"Monitor 'Nextcloud' failed check 1/2", ago(5, 9, 0)},
|
||||
{"Monitor 'Auth Portal' confirmed DOWN: no such host", ago(8, 0, 0)},
|
||||
{"Monitor 'Auth Portal' failed check 2/2", ago(8, 0, 30)},
|
||||
{"Monitor 'Auth Portal' failed check 1/2", ago(8, 1, 0)},
|
||||
{"Monitor 'Immich' recovered (was down 8m)", ago(12, 0, 0)},
|
||||
{"Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", ago(12, 8, 0)},
|
||||
{"Monitor 'Immich' failed check 3/3", ago(12, 8, 30)},
|
||||
{"Monitor 'Immich' failed check 2/3", ago(12, 9, 0)},
|
||||
{"Monitor 'Immich' failed check 1/3", ago(12, 9, 30)},
|
||||
{"Monitor 'Jellyfin' recovered (was down 5m)", ago(18, 0, 0)},
|
||||
{"Monitor 'Jellyfin' confirmed DOWN: connection reset", ago(18, 5, 0)},
|
||||
{"Monitor 'Jellyfin' failed check 2/2", ago(18, 5, 30)},
|
||||
{"Monitor 'Jellyfin' failed check 1/2", ago(18, 6, 0)},
|
||||
{"SSL warning: certificate for 'Personal Blog' expires in 9 days", ago(20, 0, 0)},
|
||||
{"Engine RESUMED (Active)", ago(22, 0, 0)},
|
||||
{"Loaded check history from database", ago(22, 0, 5)},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
@@ -225,7 +304,10 @@ func backfillLogs(db *sql.DB, now time.Time) error {
|
||||
defer stmt.Close()
|
||||
|
||||
for _, l := range logs {
|
||||
if _, err := stmt.Exec(l.msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
|
||||
// Bracket in local time to match the engine's live AddLog timestamps;
|
||||
// created_at stays UTC to match the store's CURRENT_TIMESTAMP ordering.
|
||||
msg := "[" + l.at.Local().Format("15:04") + "] " + l.text
|
||||
if _, err := stmt.Exec(msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -233,11 +315,23 @@ func backfillLogs(db *sql.DB, now time.Time) error {
|
||||
}
|
||||
|
||||
func backfillNodes(db *sql.DB, now time.Time) error {
|
||||
_, err := db.Exec(
|
||||
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
|
||||
"node-1", "leader", "us-east", now.Format("2006-01-02 15:04:05"), "2026.05.1",
|
||||
)
|
||||
return err
|
||||
// Multiple regions to show distributed probes. All seen "now" so they read ONLINE
|
||||
// for the whole capture window (kept under the 60s freshness threshold by the tape).
|
||||
nodes := []struct{ id, name, region string }{
|
||||
{"node-use1", "leader", "us-east"},
|
||||
{"node-euw1", "probe-eu", "eu-west"},
|
||||
{"node-apse1", "probe-ap", "ap-southeast"},
|
||||
}
|
||||
ts := now.Format("2006-01-02 15:04:05")
|
||||
for _, n := range nodes {
|
||||
if _, err := db.Exec(
|
||||
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
|
||||
n.id, n.name, n.region, ts, "2026.05.1",
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
|
||||
Reference in New Issue
Block a user