chore(tui): polish demo + regenerate screenshots
CI / test (pull_request) Successful in 2m45s
CI / lint (pull_request) Successful in 1m4s
CI / vulncheck (pull_request) Successful in 56s

Rework the VHS demo so the README screenshots actually entice a download.

Demo data / tooling:
- seed.yaml: real, reachable service URLs (detail now shows nextcloud.com,
  not example.com); Auth Portal -> non-resolving home.arpa host so it reads
  as a believable, reliably-DOWN monitor
- backfill: transient outages for Nextcloud/Jellyfin/Immich aligned with their
  state changes (uptime % now matches); log timestamps derived from now so the
  Logs view reads chronologically; real SSL warning; three probe nodes across
  regions; seeded alert send health
- demo.tape: shorter warm-up, added Nodes + theme captures, ordered so every
  shot stays inside the 60s node-freshness window (consistent probe count)
- vhs/crop: new tool to trim the empty terminal border around each screenshot
- setup.sh: build backfill up front for deterministic timing; UPTOP_DEMO=1

Supporting code:
- persist alert send health (new alert_health table, load on startup,
  best-effort save on send) so health/last-sent survive restarts
- latency Min/Avg/Max ignore failed checks (no more "Min 0ms")
- correct "probe"/"probes" pluralization
- stable status dot instead of an animated spinner under UPTOP_DEMO
This commit is contained in:
2026-05-28 22:32:45 -04:00
parent 9c7ed284b3
commit 03cbe283df
25 changed files with 483 additions and 99 deletions
+136 -42
View File
@@ -51,6 +51,15 @@ func main() {
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
os.Exit(1)
}
alertIDs, err := loadAlertIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load alert IDs: %v\n", err)
os.Exit(1)
}
if err := backfillAlertHealth(db, now, alertIDs); err != nil {
fmt.Fprintf(os.Stderr, "alert health: %v\n", err)
os.Exit(1)
}
var count int
_ = db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
@@ -67,6 +76,18 @@ func loadSiteIDs(db *sql.DB) (map[string]int, error) {
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func loadAlertIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM alerts")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func scanNameIDs(rows *sql.Rows) (map[string]int, error) {
defer rows.Close()
ids := make(map[string]int)
for rows.Next() {
@@ -80,27 +101,73 @@ func loadSiteIDs(db *sql.DB) (map[string]int, error) {
return ids, rows.Err()
}
// backfillAlertHealth seeds realistic send health so the Alerts tab shows recent,
// healthy "last sent" times and green health dots instead of "never" across the board.
func backfillAlertHealth(db *sql.DB, now time.Time, alertIDs map[string]int) error {
type health struct {
name string
sentAgo time.Duration
ok bool
sends int
fails int
}
rows := []health{
{"Discord Homelab", 4 * time.Minute, true, 37, 0},
{"Slack Ops", 9 * time.Minute, true, 21, 1},
{"Ntfy Alerts", 1 * time.Hour, true, 12, 0},
{"Email Oncall", 3 * time.Hour, true, 5, 0},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, r := range rows {
id, ok := alertIDs[r.name]
if !ok {
continue
}
sentAt := now.Add(-r.sentAgo).Format("2006-01-02 15:04:05")
if _, err := stmt.Exec(id, sentAt, r.ok, "", r.sends, r.fails); err != nil {
return err
}
}
return tx.Commit()
}
type monitorProfile struct {
name string
minMs int
maxMs int
downFrom int // check index where DOWN starts (-1 = never)
downFrom int // first DOWN check index (-1 = always up)
downTo int // exclusive end of the DOWN window; use 60 (total) for a still-down monitor
}
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
// Latency ranges reflect monitoring public services over the internet, so the
// detail histogram brackets the live latency the engine measures at capture time.
// 60 checks * 24m spacing = a 24h window; dip indices place outages within it.
profiles := []monitorProfile{
{"Nextcloud", 40, 80, -1},
{"Jellyfin", 80, 200, -1},
{"Home Assistant", 15, 45, -1},
{"Gitea", 40, 90, -1},
{"Traefik Dashboard", 5, 25, -1},
{"Vaultwarden", 50, 130, -1},
{"Personal Blog", 25, 65, -1},
{"Immich", 100, 280, -1}, // spikes handled below
{"Auth Portal", 30, 70, 40}, // DOWN after check 40
{"Edge Router", 5, 15, -1}, // ping
{"Postgres", 1, 5, -1}, // port
{"DNS Primary", 10, 30, -1},
{"Nextcloud", 200, 600, 47, 48}, // brief blip ~5h ago, recovered
{"Jellyfin", 40, 180, 15, 16}, // brief blip ~18h ago, recovered
{"Home Assistant", 30, 120, -1, 0}, //
{"Gitea", 50, 200, -1, 0}, //
{"Traefik Dashboard", 60, 200, -1, 0}, //
{"Vaultwarden", 80, 250, -1, 0}, //
{"Personal Blog", 40, 160, -1, 0}, //
{"Immich", 60, 300, 30, 31}, // brief blip ~12h ago; periodic spikes below
{"Auth Portal", 30, 90, 40, 60}, // DOWN ~8h ago, still down
{"Edge Router", 5, 20, -1, 0}, // ping
{"Postgres", 1, 6, -1, 0}, // port
{"DNS Primary", 8, 30, -1, 0}, // dns
}
tx, err := db.Begin()
@@ -128,7 +195,7 @@ func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]i
var latencyNs int64
isUp := true
if p.downFrom >= 0 && i >= p.downFrom {
if p.downFrom >= 0 && i >= p.downFrom && i < p.downTo {
latencyNs = 0
isUp = false
} else {
@@ -155,14 +222,16 @@ func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
reason string
at time.Time
}
// Timed to line up with the history dips (Nextcloud ~5h, Immich ~12h, Jellyfin ~18h)
// and the still-down Auth Portal (~8h), so detail panels read coherently.
changes := []sc{
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-3 * 24 * time.Hour).Add(-5 * time.Minute)},
{"Nextcloud", "DOWN", "UP", "", now.Add(-3 * 24 * time.Hour)},
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-3 * time.Minute)},
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
{"Auth Portal", "UP", "DOWN", "connection refused", now.Add(-8 * time.Hour)},
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-5 * time.Hour).Add(-8 * time.Minute)},
{"Nextcloud", "DOWN", "UP", "", now.Add(-5 * time.Hour)},
{"Auth Portal", "UP", "DOWN", "no such host", now.Add(-8 * time.Hour)},
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-5 * time.Minute)},
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
}
tx, err := db.Begin()
@@ -191,25 +260,35 @@ func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
func backfillLogs(db *sql.DB, now time.Time) error {
type logEntry struct {
msg string
at time.Time
text string
at time.Time
}
ago := func(h, m, s int) time.Time {
return now.Add(-(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute + time.Duration(s)*time.Second))
}
// Ordered newest-first. The bracket time is derived from `at` (not hardcoded), so the
// Logs view — which renders the leading [HH:MM] — reads chronologically. Outage times
// line up with the state changes and history dips above.
logs := []logEntry{
{"[06:12] Monitor 'Auth Portal' confirmed DOWN: connection refused", now.Add(-8 * time.Hour)},
{"[06:12] Monitor 'Auth Portal' failed check 2/2", now.Add(-8*time.Hour - 30*time.Second)},
{"[06:11] Monitor 'Auth Portal' failed check 1/2", now.Add(-8*time.Hour - 60*time.Second)},
{"[12:33] Monitor 'Immich' recovered (was down 8m)", now.Add(-12 * time.Hour)},
{"[12:25] Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", now.Add(-12*time.Hour - 8*time.Minute)},
{"[12:25] Monitor 'Immich' failed check 3/3", now.Add(-12*time.Hour - 8*time.Minute - 30*time.Second)},
{"[12:25] Monitor 'Immich' failed check 2/3", now.Add(-12*time.Hour - 8*time.Minute - 60*time.Second)},
{"[12:24] Monitor 'Immich' failed check 1/3", now.Add(-12*time.Hour - 9*time.Minute)},
{"[06:14] Monitor 'Jellyfin' recovered (was down 3m)", now.Add(-18 * time.Hour)},
{"[06:11] Monitor 'Jellyfin' confirmed DOWN: connection reset", now.Add(-18*time.Hour - 3*time.Minute)},
{"[06:11] Monitor 'Jellyfin' failed check 2/2", now.Add(-18*time.Hour - 3*time.Minute - 30*time.Second)},
{"[06:10] Monitor 'Jellyfin' failed check 1/2", now.Add(-18*time.Hour - 4*time.Minute)},
{"[23:45] SSL certificate for 'Personal Blog' expires in 42 days", now.Add(-28 * time.Hour)},
{"[08:00] Loaded check history from database", now.Add(-32*time.Hour - 30*time.Minute)},
{"[08:00] Engine RESUMED (Active)", now.Add(-32*time.Hour - 30*time.Minute - 5*time.Second)},
{"Monitor 'Nextcloud' recovered (was down 8m)", ago(5, 0, 0)},
{"Monitor 'Nextcloud' confirmed DOWN: read timeout", ago(5, 8, 0)},
{"Monitor 'Nextcloud' failed check 2/2", ago(5, 8, 30)},
{"Monitor 'Nextcloud' failed check 1/2", ago(5, 9, 0)},
{"Monitor 'Auth Portal' confirmed DOWN: no such host", ago(8, 0, 0)},
{"Monitor 'Auth Portal' failed check 2/2", ago(8, 0, 30)},
{"Monitor 'Auth Portal' failed check 1/2", ago(8, 1, 0)},
{"Monitor 'Immich' recovered (was down 8m)", ago(12, 0, 0)},
{"Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", ago(12, 8, 0)},
{"Monitor 'Immich' failed check 3/3", ago(12, 8, 30)},
{"Monitor 'Immich' failed check 2/3", ago(12, 9, 0)},
{"Monitor 'Immich' failed check 1/3", ago(12, 9, 30)},
{"Monitor 'Jellyfin' recovered (was down 5m)", ago(18, 0, 0)},
{"Monitor 'Jellyfin' confirmed DOWN: connection reset", ago(18, 5, 0)},
{"Monitor 'Jellyfin' failed check 2/2", ago(18, 5, 30)},
{"Monitor 'Jellyfin' failed check 1/2", ago(18, 6, 0)},
{"SSL warning: certificate for 'Personal Blog' expires in 9 days", ago(20, 0, 0)},
{"Engine RESUMED (Active)", ago(22, 0, 0)},
{"Loaded check history from database", ago(22, 0, 5)},
}
tx, err := db.Begin()
@@ -225,7 +304,10 @@ func backfillLogs(db *sql.DB, now time.Time) error {
defer stmt.Close()
for _, l := range logs {
if _, err := stmt.Exec(l.msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
// Bracket in local time to match the engine's live AddLog timestamps;
// created_at stays UTC to match the store's CURRENT_TIMESTAMP ordering.
msg := "[" + l.at.Local().Format("15:04") + "] " + l.text
if _, err := stmt.Exec(msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
@@ -233,11 +315,23 @@ func backfillLogs(db *sql.DB, now time.Time) error {
}
func backfillNodes(db *sql.DB, now time.Time) error {
_, err := db.Exec(
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
"node-1", "leader", "us-east", now.Format("2006-01-02 15:04:05"), "2026.05.1",
)
return err
// Multiple regions to show distributed probes. All seen "now" so they read ONLINE
// for the whole capture window (kept under the 60s freshness threshold by the tape).
nodes := []struct{ id, name, region string }{
{"node-use1", "leader", "us-east"},
{"node-euw1", "probe-eu", "eu-west"},
{"node-apse1", "probe-ap", "ap-southeast"},
}
ts := now.Format("2006-01-02 15:04:05")
for _, n := range nodes {
if _, err := db.Exec(
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
n.id, n.name, n.region, ts, "2026.05.1",
); err != nil {
return err
}
}
return nil
}
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
+123
View File
@@ -0,0 +1,123 @@
// Command crop trims the uniform background border around each VHS screenshot so the
// content fills the frame instead of floating in a large empty terminal. Sparse views
// (alerts, detail, nodes) would otherwise sit in a sea of dead space.
//
// Usage: crop [dir] (dir defaults to vhs/screenshots)
package main
import (
"fmt"
"image"
"image/color"
"image/png"
"os"
"path/filepath"
)
// pad is the margin (px) left around the detected content. tol is the per-channel
// colour distance (summed) above which a pixel counts as content rather than background.
const (
pad = 24
tol = 28
)
func main() {
dir := "vhs/screenshots"
if len(os.Args) > 1 {
dir = os.Args[1]
}
paths, err := filepath.Glob(filepath.Join(dir, "*.png"))
if err != nil {
fmt.Fprintf(os.Stderr, "glob: %v\n", err)
os.Exit(1)
}
if len(paths) == 0 {
fmt.Fprintf(os.Stderr, "no PNGs in %s\n", dir)
os.Exit(1)
}
for _, p := range paths {
w, h, err := cropFile(p)
if err != nil {
fmt.Fprintf(os.Stderr, "crop %s: %v\n", p, err)
os.Exit(1)
}
fmt.Printf("cropped %s -> %dx%d\n", filepath.Base(p), w, h)
}
}
func cropFile(path string) (int, int, error) {
f, err := os.Open(path) //nolint:gosec // dev tool: paths come from a trusted local glob
if err != nil {
return 0, 0, err
}
src, err := png.Decode(f)
_ = f.Close()
if err != nil {
return 0, 0, err
}
b := src.Bounds()
// Background colour sampled from a corner — always inside VHS's blank padding.
bgR, bgG, bgB := rgb(src.At(b.Min.X+2, b.Min.Y+2))
minX, minY := b.Max.X, b.Max.Y
maxX, maxY := b.Min.X, b.Min.Y
found := false
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
r, g, bl := rgb(src.At(x, y))
if abs(r-bgR)+abs(g-bgG)+abs(bl-bgB) > tol {
found = true
minX, minY = min(minX, x), min(minY, y)
maxX, maxY = max(maxX, x), max(maxY, y)
}
}
}
if !found {
return b.Dx(), b.Dy(), nil // blank frame — leave untouched
}
minX = clamp(minX-pad, b.Min.X, b.Max.X)
minY = clamp(minY-pad, b.Min.Y, b.Max.Y)
maxX = clamp(maxX+pad+1, b.Min.X, b.Max.X)
maxY = clamp(maxY+pad+1, b.Min.Y, b.Max.Y)
dst := image.NewRGBA(image.Rect(0, 0, maxX-minX, maxY-minY))
for y := minY; y < maxY; y++ {
for x := minX; x < maxX; x++ {
dst.Set(x-minX, y-minY, src.At(x, y))
}
}
out, err := os.Create(path) //nolint:gosec // dev tool: paths come from a trusted local glob
if err != nil {
return 0, 0, err
}
defer out.Close() //nolint:errcheck // best-effort close on write path
if err := png.Encode(out, dst); err != nil {
return 0, 0, err
}
return dst.Bounds().Dx(), dst.Bounds().Dy(), nil
}
func rgb(c color.Color) (int, int, int) {
r, g, b, _ := c.RGBA()
return int(r >> 8), int(g >> 8), int(b >> 8)
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func clamp(v, lo, hi int) int {
if v < lo {
return lo
}
if v > hi {
return hi
}
return v
}
+38 -20
View File
@@ -6,49 +6,67 @@ Set Padding 20
Set Framerate 15
Set TypingSpeed 50ms
# Seed demo data + start uptop (UPTOP_DEMO=1 → stable pulse dot for stills).
Hide
Type "bash vhs/setup.sh /tmp/uptop-vhs.db"
Enter
Sleep 45s
# Warm-up: push heartbeat lands (~10s) and initial checks settle. Kept short so every
# capture stays inside the 60s node-freshness window (consistent "3 probes" footer).
Sleep 18s
Show
Sleep 5s
Sleep 2s
# Sites tab — hero shot with mixed monitor states
# 1. Sites — hero shot: mixed states, history sparklines, SSL, retries.
Screenshot vhs/screenshots/monitors.png
Sleep 1s
# Navigate to Nextcloud (row 6: group + 3 children + Auth Portal)
# 2. Detail — drill into Nextcloud (6th row from the top).
Down
Sleep 200ms
Sleep 150ms
Down
Sleep 200ms
Sleep 150ms
Down
Sleep 200ms
Sleep 150ms
Down
Sleep 200ms
Sleep 150ms
Down
Sleep 200ms
Sleep 300ms
Type "i"
Sleep 3s
Sleep 2s
Screenshot vhs/screenshots/detail.png
Sleep 1s
# Close detail
Sleep 500ms
Escape
Sleep 1s
# Tab to Alerts
# 3. Alerts — channels with health dots + recent "last sent".
Tab
Sleep 2s
Sleep 1500ms
Screenshot vhs/screenshots/alerts.png
Sleep 1s
Sleep 500ms
# Tab to Logs
# 4. Logs — chronological, severity-coloured event stream.
Tab
Sleep 2s
Sleep 1500ms
Screenshot vhs/screenshots/logs.png
Sleep 1s
Sleep 500ms
# 5. Nodes — distributed probes across regions.
Tab
Sleep 1500ms
Screenshot vhs/screenshots/nodes.png
Sleep 500ms
# 6. Theme — cycle to the next theme, return to Sites for an alternate-palette hero.
Type "T"
Sleep 500ms
Tab
Sleep 200ms
Tab
Sleep 200ms
Tab
Sleep 1s
Screenshot vhs/screenshots/theme.png
Sleep 500ms
# Quit
Type "q"
Sleep 1s
Binary file not shown.

Before

Width:  |  Height:  |  Size: 84 KiB

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 80 KiB

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 160 KiB

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 219 KiB

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 253 KiB

+10 -10
View File
@@ -28,7 +28,7 @@ monitors:
# HTTP — homelab services
- name: Nextcloud
type: http
url: https://example.com
url: https://nextcloud.com
interval: 30
alert: Discord Homelab
check_ssl: true
@@ -37,21 +37,21 @@ monitors:
- name: Jellyfin
type: http
url: https://example.com
url: https://jellyfin.org
interval: 30
alert: Discord Homelab
max_retries: 2
- name: Home Assistant
type: http
url: https://example.com
url: https://www.home-assistant.io
interval: 30
alert: Discord Homelab
max_retries: 3
- name: Gitea
type: http
url: https://example.com
url: https://about.gitea.com
interval: 60
alert: Discord Homelab
check_ssl: true
@@ -60,14 +60,14 @@ monitors:
- name: Traefik Dashboard
type: http
url: https://example.com
url: https://traefik.io
interval: 60
alert: Discord Homelab
max_retries: 1
- name: Vaultwarden
type: http
url: https://example.com
url: https://bitwarden.com
interval: 30
alert: Discord Homelab
check_ssl: true
@@ -76,7 +76,7 @@ monitors:
- name: Personal Blog
type: http
url: https://example.com
url: https://jvns.ca
interval: 120
alert: Discord Homelab
check_ssl: true
@@ -85,17 +85,17 @@ monitors:
- name: Immich
type: http
url: https://example.com
url: https://immich.app
interval: 60
alert: Discord Homelab
check_ssl: true
expiry_threshold: 7
max_retries: 3
# HTTP — deliberate failure
# HTTP — deliberate failure (non-resolving homelab host → stays DOWN)
- name: Auth Portal
type: http
url: http://localhost:1
url: https://auth.home.arpa
interval: 30
alert: Discord Homelab
max_retries: 2
+7 -3
View File
@@ -9,13 +9,16 @@ echo "==> Seeding monitors and alerts..."
UPTOP_DB_DSN="$DB" ./uptop apply -f vhs/seed.yaml 2>&1
echo "==> Backfilling check history..."
BACKFILL_OUT=$(go run ./vhs/backfill/ "$DB")
# Build first so the backfill's `now` (node last_seen, heartbeat timing) isn't racing
# a cold compile — keeps the capture window deterministic.
go build -o /tmp/uptop-backfill ./vhs/backfill/
BACKFILL_OUT=$(/tmp/uptop-backfill "$DB")
echo "$BACKFILL_OUT"
PUSH_TOKEN=$(echo "$BACKFILL_OUT" | grep '^PUSH_TOKEN=' | cut -d= -f2)
if [ -n "$PUSH_TOKEN" ]; then
echo "==> Sending push heartbeat in 15s (background)..."
(sleep 15 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) &
echo "==> Sending push heartbeat in 10s (background)..."
(sleep 10 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) &
fi
echo "==> Starting uptop server..."
@@ -24,4 +27,5 @@ exec env \
UPTOP_PORT=23299 \
UPTOP_HTTP_PORT=18099 \
UPTOP_ALLOW_PRIVATE_TARGETS=true \
UPTOP_DEMO=1 \
./uptop serve 2>/dev/null