5 Commits

Author SHA1 Message Date
lerko c9fb006042 chore: ignore sqlite WAL sidecars and stray vhs build binaries
CI / test (pull_request) Successful in 2m38s
CI / lint (pull_request) Successful in 1m12s
CI / vulncheck (pull_request) Successful in 56s
uptop.db* now covers the -shm/-wal sidecars; /backfill and /crop catch
binaries left by `go build ./vhs/<tool>` without -o.
2026-05-29 11:45:47 -04:00
lerko 03cbe283df chore(tui): polish demo + regenerate screenshots
CI / test (pull_request) Successful in 2m45s
CI / lint (pull_request) Successful in 1m4s
CI / vulncheck (pull_request) Successful in 56s
Rework the VHS demo so the README screenshots actually entice a download.

Demo data / tooling:
- seed.yaml: real, reachable service URLs (detail now shows nextcloud.com,
  not example.com); Auth Portal -> non-resolving home.arpa host so it reads
  as a believable, reliably-DOWN monitor
- backfill: transient outages for Nextcloud/Jellyfin/Immich aligned with their
  state changes (uptime % now matches); log timestamps derived from now so the
  Logs view reads chronologically; real SSL warning; three probe nodes across
  regions; seeded alert send health
- demo.tape: shorter warm-up, added Nodes + theme captures, ordered so every
  shot stays inside the 60s node-freshness window (consistent probe count)
- vhs/crop: new tool to trim the empty terminal border around each screenshot
- setup.sh: build backfill up front for deterministic timing; UPTOP_DEMO=1

Supporting code:
- persist alert send health (new alert_health table, load on startup,
  best-effort save on send) so health/last-sent survive restarts
- latency Min/Avg/Max ignore failed checks (no more "Min 0ms")
- correct "probe"/"probes" pluralization
- stable status dot instead of an animated spinner under UPTOP_DEMO
2026-05-28 22:32:45 -04:00
lerko 9c7ed284b3 fix(lint): suppress gosec G404 for demo data RNG
CI / test (pull_request) Successful in 2m48s
CI / lint (pull_request) Successful in 1m7s
CI / vulncheck (pull_request) Successful in 56s
2026-05-28 19:00:45 -04:00
lerko ff85abb2c9 fix(lint): resolve errcheck and gosec warnings in vhs backfill tool
CI / test (pull_request) Successful in 2m52s
CI / lint (pull_request) Failing after 1m11s
CI / vulncheck (pull_request) Successful in 56s
2026-05-28 18:50:56 -04:00
lerko 10f249a2ae chore: add TUI screenshots via VHS
CI / test (pull_request) Successful in 2m50s
CI / lint (pull_request) Failing after 1m12s
CI / vulncheck (pull_request) Successful in 56s
Screenshots capture 4 views: monitors dashboard (hero), detail panel,
alerts tab, and logs tab. Includes VHS tape, demo seed config, and
setup script for reproducible captures.

Also fixes latencySparkline to color DOWN checks red instead of green
— previously failed checks with 0ms latency rendered as green bars.
2026-05-28 18:30:39 -04:00
26 changed files with 928 additions and 35 deletions
+5 -1
View File
@@ -27,7 +27,11 @@ go.work
# End of https://www.toptal.com/developers/gitignore/api/go
/uptop
uptop.db
# stray binaries from `go build ./vhs/<tool>` without -o
/backfill
/crop
# sqlite db + WAL sidecars (-shm/-wal)
uptop.db*
.ssh
+1
View File
@@ -385,6 +385,7 @@ func runServe(args []string) {
eng.InitHistory()
eng.InitLogs()
eng.InitAlertHealth()
eng.Start(ctx)
tlsCert := os.Getenv("UPTOP_TLS_CERT")
+4
View File
@@ -53,6 +53,10 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return models.Pr
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
func (m *mockStore) DeleteNode(string) error { return nil }
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
return nil, nil
}
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
func (m *mockStore) SaveLog(string) error { return nil }
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
+4
View File
@@ -51,6 +51,10 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return m
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
func (m *mockStore) DeleteNode(string) error { return nil }
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
return nil, nil
}
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
func (m *mockStore) SaveLog(string) error { return nil }
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
+11
View File
@@ -79,6 +79,17 @@ type ProbeNode struct {
Version string
}
// AlertHealthRecord is the persisted send health of an alert channel. It lets the
// "last sent" / health indicators survive restarts instead of resetting to "never".
type AlertHealthRecord struct {
AlertID int
LastSendAt time.Time
LastSendOK bool
LastError string
SendCount int
FailCount int
}
type MaintenanceWindow struct {
ID int
MonitorID int
+32
View File
@@ -146,6 +146,26 @@ func (e *Engine) InitLogs() {
e.logStore = logs
}
// InitAlertHealth restores persisted alert send health so the dashboard shows real
// "last sent" / health state on startup instead of resetting every channel to "never".
func (e *Engine) InitAlertHealth() {
records, err := e.db.LoadAlertHealth()
if err != nil {
return
}
e.alertHealthMu.Lock()
defer e.alertHealthMu.Unlock()
for id, r := range records {
e.alertHealth[id] = AlertHealth{
LastSendAt: r.LastSendAt,
LastSendOK: r.LastSendOK,
LastError: r.LastError,
SendCount: r.SendCount,
FailCount: r.FailCount,
}
}
}
func (e *Engine) GetLogs() []string {
e.logMu.RLock()
defer e.logMu.RUnlock()
@@ -612,6 +632,18 @@ func (e *Engine) recordAlertResult(alertID int, ok bool, errMsg string) {
h.FailCount++
}
e.alertHealth[alertID] = h
// Persist best-effort so health survives restarts; DB IO off the alert path.
go func(rec models.AlertHealthRecord) {
_ = e.db.SaveAlertHealth(rec)
}(models.AlertHealthRecord{
AlertID: alertID,
LastSendAt: h.LastSendAt,
LastSendOK: h.LastSendOK,
LastError: h.LastError,
SendCount: h.SendCount,
FailCount: h.FailCount,
})
}
func (e *Engine) GetAlertHealth(alertID int) AlertHealth {
+4
View File
@@ -63,6 +63,10 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return m
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
func (m *mockStore) DeleteNode(string) error { return nil }
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
return nil, nil
}
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
return nil, nil
}
+4
View File
@@ -65,6 +65,10 @@ func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int,
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
func (m *mockStore) DeleteNode(string) error { return nil }
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
return nil, nil
}
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
func (m *mockStore) SaveLog(string) error { return nil }
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
+1
View File
@@ -14,6 +14,7 @@ type Dialect interface {
ImportWipe(tx *sql.Tx)
ImportResetSequences(tx *sql.Tx)
UpsertNodeSQL() string
UpsertAlertHealthSQL() string
}
func rewritePlaceholders(query string, dollarStyle bool) string {
+12
View File
@@ -81,6 +81,14 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
changed_at TIMESTAMP DEFAULT NOW()
)`,
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
`CREATE TABLE IF NOT EXISTS alert_health (
alert_id INTEGER PRIMARY KEY,
last_send_at TIMESTAMP,
last_send_ok BOOLEAN DEFAULT FALSE,
last_error TEXT DEFAULT '',
send_count INTEGER DEFAULT 0,
fail_count INTEGER DEFAULT 0
)`,
}
}
@@ -106,6 +114,10 @@ func (d *PostgresDialect) UpsertNodeSQL() string {
return "INSERT INTO nodes (id, name, region, last_seen, version) VALUES ($1, $2, $3, NOW(), $4) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, region = EXCLUDED.region, last_seen = NOW(), version = EXCLUDED.version"
}
func (d *PostgresDialect) UpsertAlertHealthSQL() string {
return "INSERT INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (alert_id) DO UPDATE SET last_send_at = EXCLUDED.last_send_at, last_send_ok = EXCLUDED.last_send_ok, last_error = EXCLUDED.last_error, send_count = EXCLUDED.send_count, fail_count = EXCLUDED.fail_count"
}
func (d *PostgresDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {}
func (d *PostgresDialect) ImportWipe(tx *sql.Tx) {
+12
View File
@@ -88,6 +88,14 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
changed_at DATETIME DEFAULT CURRENT_TIMESTAMP
)`,
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
`CREATE TABLE IF NOT EXISTS alert_health (
alert_id INTEGER PRIMARY KEY,
last_send_at DATETIME,
last_send_ok BOOLEAN DEFAULT 0,
last_error TEXT DEFAULT '',
send_count INTEGER DEFAULT 0,
fail_count INTEGER DEFAULT 0
)`,
}
}
@@ -113,6 +121,10 @@ func (d *SQLiteDialect) UpsertNodeSQL() string {
return "INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?)"
}
func (d *SQLiteDialect) UpsertAlertHealthSQL() string {
return "INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)"
}
func (d *SQLiteDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {
var count int
_ = db.QueryRow("SELECT COUNT(*) FROM " + table).Scan(&count) //nolint:errcheck
+31
View File
@@ -430,6 +430,37 @@ func (s *SQLStore) DeleteNode(id string) error {
return err
}
func (s *SQLStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
rows, err := s.db.Query("SELECT alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count FROM alert_health")
if err != nil {
return nil, err
}
defer rows.Close()
out := make(map[int]models.AlertHealthRecord)
for rows.Next() {
var r models.AlertHealthRecord
var lastSend sql.NullTime
if err := rows.Scan(&r.AlertID, &lastSend, &r.LastSendOK, &r.LastError, &r.SendCount, &r.FailCount); err != nil {
return out, err
}
if lastSend.Valid {
r.LastSendAt = lastSend.Time
}
out[r.AlertID] = r
}
return out, rows.Err()
}
func (s *SQLStore) SaveAlertHealth(h models.AlertHealthRecord) error {
var lastSend interface{}
if !h.LastSendAt.IsZero() {
lastSend = h.LastSendAt
}
_, err := s.db.Exec(s.dialect.UpsertAlertHealthSQL(),
h.AlertID, lastSend, h.LastSendOK, h.LastError, h.SendCount, h.FailCount)
return err
}
func (s *SQLStore) SaveLog(message string) error {
_, err := s.db.Exec(s.q("INSERT INTO logs (message) VALUES (?)"), message)
if err != nil {
+4
View File
@@ -49,6 +49,10 @@ type Store interface {
UpdateNodeLastSeen(id string) error
DeleteNode(id string) error
// Alert Health
LoadAlertHealth() (map[int]models.AlertHealthRecord, error)
SaveAlertHealth(h models.AlertHealthRecord) error
// Logs
SaveLog(message string) error
LoadLogs(limit int) ([]string, error)
+29 -13
View File
@@ -60,14 +60,18 @@ type siteFormData struct {
Regions string
}
func latencySparkline(latencies []time.Duration, width int) string {
func latencySparkline(latencies []time.Duration, statuses []bool, width int) string {
if len(latencies) == 0 {
return subtleStyle.Render(strings.Repeat("·", width))
}
samples := latencies
sampledStatuses := statuses
if len(samples) > width {
samples = samples[len(samples)-width:]
if len(sampledStatuses) > width {
sampledStatuses = sampledStatuses[len(sampledStatuses)-width:]
}
}
minL, maxL := samples[0], samples[0]
@@ -85,7 +89,7 @@ func latencySparkline(latencies []time.Duration, width int) string {
sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
}
spread := maxL - minL
for _, l := range samples {
for i, l := range samples {
idx := 0
if spread > 0 {
idx = int(float64(l-minL) / float64(spread) * 7)
@@ -94,6 +98,10 @@ func latencySparkline(latencies []time.Duration, width int) string {
}
}
ch := string(sparkChars[idx])
isDown := i < len(sampledStatuses) && !sampledStatuses[i]
if isDown {
sb.WriteString(dangerStyle.Render(ch))
} else {
ms := l.Milliseconds()
if ms < 200 {
sb.WriteString(specialStyle.Render(ch))
@@ -103,6 +111,7 @@ func latencySparkline(latencies []time.Duration, width int) string {
sb.WriteString(dangerStyle.Render(ch))
}
}
}
return sb.String()
}
@@ -474,7 +483,7 @@ func (m Model) viewSitesTab() string {
if site.Type == "push" {
spark = heartbeatSparkline(hist.Statuses, sparkWidth)
} else {
spark = latencySparkline(hist.Latencies, sparkWidth)
spark = latencySparkline(hist.Latencies, hist.Statuses, sparkWidth)
}
rows = append(rows, []string{
@@ -949,20 +958,27 @@ func (m Model) viewDetailPanel() string {
up, len(hist.Statuses))
}
} else {
b.WriteString(" " + latencySparkline(hist.Latencies, sparkWidth))
if len(hist.Latencies) > 0 {
minL, maxL := hist.Latencies[0], hist.Latencies[0]
var total time.Duration
for _, l := range hist.Latencies {
total += l
if l < minL {
minL = l
b.WriteString(" " + latencySparkline(hist.Latencies, hist.Statuses, sparkWidth))
// Stats over successful checks only — a failed check is stored as 0ns latency
// and would otherwise drag Min to 0ms and skew the average.
var minL, maxL, total time.Duration
count := 0
for i, l := range hist.Latencies {
if i < len(hist.Statuses) && !hist.Statuses[i] {
continue
}
if l > maxL {
if count == 0 {
minL, maxL = l, l
} else if l < minL {
minL = l
} else if l > maxL {
maxL = l
}
total += l
count++
}
avg := total / time.Duration(len(hist.Latencies))
if count > 0 {
avg := total / time.Duration(count)
fmt.Fprintf(&b, "\n %s %dms %s %dms %s %dms",
subtleStyle.Render("Min"), minL.Milliseconds(),
subtleStyle.Render("Avg"), avg.Milliseconds(),
+24 -6
View File
@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"math"
"os"
"sort"
"strings"
"time"
@@ -122,6 +123,10 @@ type Model struct {
filterMode bool
filterText string
// demoMode renders a stable status dot instead of the animated pulse so
// screenshots/recordings don't capture the spinner mid-frame. Set via UPTOP_DEMO=1.
demoMode bool
}
func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
@@ -155,6 +160,7 @@ func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
collapsed: collapsed,
theme: theme,
themeIndex: themeIdx,
demoMode: os.Getenv("UPTOP_DEMO") == "1",
}
}
@@ -754,11 +760,6 @@ func (m *Model) submitForm() {
}
func (m Model) pulseIndicator() string {
frame := m.tickCount % len(pulseFrames)
brightness := int(m.pulsePos*155) + 100
if brightness > 255 {
brightness = 255
}
hasDown := false
for _, s := range m.sites {
if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") {
@@ -766,6 +767,19 @@ func (m Model) pulseIndicator() string {
break
}
}
// Stills can't show animation: render a stable status dot in demo mode.
if m.demoMode {
c := m.theme.Success
if hasDown {
c = m.theme.Danger
}
return lipgloss.NewStyle().Foreground(c).Render("●")
}
frame := m.tickCount % len(pulseFrames)
brightness := int(m.pulsePos*155) + 100
if brightness > 255 {
brightness = 255
}
var color string
if hasDown {
color = fmt.Sprintf("#%02x%02x%02x", brightness, brightness/4, brightness/4)
@@ -953,7 +967,11 @@ func (m Model) viewDashboard() string {
online++
}
}
statusParts = append(statusParts, fmt.Sprintf("%d probes", online))
probeLabel := "probes"
if online == 1 {
probeLabel = "probe"
}
statusParts = append(statusParts, fmt.Sprintf("%d %s", online, probeLabel))
}
statusLine := strings.Join(statusParts, subtleStyle.Render(" · "))
+368
View File
@@ -0,0 +1,368 @@
package main
import (
"database/sql"
"fmt"
"math/rand/v2"
"os"
"time"
_ "github.com/mattn/go-sqlite3"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "usage: backfill <db-path>")
os.Exit(1)
}
db, err := sql.Open("sqlite3", os.Args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "open: %v\n", err)
os.Exit(1)
}
defer db.Close()
ids, err := loadSiteIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load site IDs: %v\n", err)
os.Exit(1)
}
rng := rand.New(rand.NewPCG(42, 0)) //nolint:gosec // deterministic seed for reproducible demo data
now := time.Now().UTC()
if err := backfillHistory(db, rng, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "history: %v\n", err)
os.Exit(1)
}
if err := backfillStateChanges(db, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "state changes: %v\n", err)
os.Exit(1)
}
if err := backfillLogs(db, now); err != nil {
fmt.Fprintf(os.Stderr, "logs: %v\n", err)
os.Exit(1)
}
if err := backfillNodes(db, now); err != nil {
fmt.Fprintf(os.Stderr, "nodes: %v\n", err)
os.Exit(1)
}
if err := backfillMaintenance(db, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
os.Exit(1)
}
alertIDs, err := loadAlertIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load alert IDs: %v\n", err)
os.Exit(1)
}
if err := backfillAlertHealth(db, now, alertIDs); err != nil {
fmt.Fprintf(os.Stderr, "alert health: %v\n", err)
os.Exit(1)
}
var count int
_ = db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
fmt.Printf("Backfill complete: %d check records\n", count)
var token string
if err := db.QueryRow("SELECT token FROM sites WHERE name='Nightly Backup'").Scan(&token); err == nil {
fmt.Printf("PUSH_TOKEN=%s\n", token)
}
}
func loadSiteIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM sites")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func loadAlertIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM alerts")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func scanNameIDs(rows *sql.Rows) (map[string]int, error) {
defer rows.Close()
ids := make(map[string]int)
for rows.Next() {
var id int
var name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
ids[name] = id
}
return ids, rows.Err()
}
// backfillAlertHealth seeds realistic send health so the Alerts tab shows recent,
// healthy "last sent" times and green health dots instead of "never" across the board.
func backfillAlertHealth(db *sql.DB, now time.Time, alertIDs map[string]int) error {
type health struct {
name string
sentAgo time.Duration
ok bool
sends int
fails int
}
rows := []health{
{"Discord Homelab", 4 * time.Minute, true, 37, 0},
{"Slack Ops", 9 * time.Minute, true, 21, 1},
{"Ntfy Alerts", 1 * time.Hour, true, 12, 0},
{"Email Oncall", 3 * time.Hour, true, 5, 0},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, r := range rows {
id, ok := alertIDs[r.name]
if !ok {
continue
}
sentAt := now.Add(-r.sentAgo).Format("2006-01-02 15:04:05")
if _, err := stmt.Exec(id, sentAt, r.ok, "", r.sends, r.fails); err != nil {
return err
}
}
return tx.Commit()
}
type monitorProfile struct {
name string
minMs int
maxMs int
downFrom int // first DOWN check index (-1 = always up)
downTo int // exclusive end of the DOWN window; use 60 (total) for a still-down monitor
}
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
// Latency ranges reflect monitoring public services over the internet, so the
// detail histogram brackets the live latency the engine measures at capture time.
// 60 checks * 24m spacing = a 24h window; dip indices place outages within it.
profiles := []monitorProfile{
{"Nextcloud", 200, 600, 47, 48}, // brief blip ~5h ago, recovered
{"Jellyfin", 40, 180, 15, 16}, // brief blip ~18h ago, recovered
{"Home Assistant", 30, 120, -1, 0}, //
{"Gitea", 50, 200, -1, 0}, //
{"Traefik Dashboard", 60, 200, -1, 0}, //
{"Vaultwarden", 80, 250, -1, 0}, //
{"Personal Blog", 40, 160, -1, 0}, //
{"Immich", 60, 300, 30, 31}, // brief blip ~12h ago; periodic spikes below
{"Auth Portal", 30, 90, 40, 60}, // DOWN ~8h ago, still down
{"Edge Router", 5, 20, -1, 0}, // ping
{"Postgres", 1, 6, -1, 0}, // port
{"DNS Primary", 8, 30, -1, 0}, // dns
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO check_history (site_id, latency_ns, is_up, checked_at) VALUES (?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
const total = 60
for _, p := range profiles {
siteID, ok := ids[p.name]
if !ok {
continue
}
for i := 0; i < total; i++ {
minutesAgo := (total - i) * 24
checkedAt := now.Add(-time.Duration(minutesAgo) * time.Minute)
var latencyNs int64
isUp := true
if p.downFrom >= 0 && i >= p.downFrom && i < p.downTo {
latencyNs = 0
isUp = false
} else {
ms := p.minMs + rng.IntN(p.maxMs-p.minMs)
if p.name == "Immich" && i%17 == 0 {
ms = 250 + rng.IntN(100)
}
latencyNs = int64(ms) * 1_000_000
}
if _, err := stmt.Exec(siteID, latencyNs, isUp, checkedAt.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
}
return tx.Commit()
}
func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
type sc struct {
name string
from string
to string
reason string
at time.Time
}
// Timed to line up with the history dips (Nextcloud ~5h, Immich ~12h, Jellyfin ~18h)
// and the still-down Auth Portal (~8h), so detail panels read coherently.
changes := []sc{
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-5 * time.Hour).Add(-8 * time.Minute)},
{"Nextcloud", "DOWN", "UP", "", now.Add(-5 * time.Hour)},
{"Auth Portal", "UP", "DOWN", "no such host", now.Add(-8 * time.Hour)},
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-5 * time.Minute)},
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO state_changes (site_id, from_status, to_status, error_reason, changed_at) VALUES (?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, c := range changes {
siteID, ok := ids[c.name]
if !ok {
continue
}
if _, err := stmt.Exec(siteID, c.from, c.to, c.reason, c.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
return tx.Commit()
}
func backfillLogs(db *sql.DB, now time.Time) error {
type logEntry struct {
text string
at time.Time
}
ago := func(h, m, s int) time.Time {
return now.Add(-(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute + time.Duration(s)*time.Second))
}
// Ordered newest-first. The bracket time is derived from `at` (not hardcoded), so the
// Logs view — which renders the leading [HH:MM] — reads chronologically. Outage times
// line up with the state changes and history dips above.
logs := []logEntry{
{"Monitor 'Nextcloud' recovered (was down 8m)", ago(5, 0, 0)},
{"Monitor 'Nextcloud' confirmed DOWN: read timeout", ago(5, 8, 0)},
{"Monitor 'Nextcloud' failed check 2/2", ago(5, 8, 30)},
{"Monitor 'Nextcloud' failed check 1/2", ago(5, 9, 0)},
{"Monitor 'Auth Portal' confirmed DOWN: no such host", ago(8, 0, 0)},
{"Monitor 'Auth Portal' failed check 2/2", ago(8, 0, 30)},
{"Monitor 'Auth Portal' failed check 1/2", ago(8, 1, 0)},
{"Monitor 'Immich' recovered (was down 8m)", ago(12, 0, 0)},
{"Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", ago(12, 8, 0)},
{"Monitor 'Immich' failed check 3/3", ago(12, 8, 30)},
{"Monitor 'Immich' failed check 2/3", ago(12, 9, 0)},
{"Monitor 'Immich' failed check 1/3", ago(12, 9, 30)},
{"Monitor 'Jellyfin' recovered (was down 5m)", ago(18, 0, 0)},
{"Monitor 'Jellyfin' confirmed DOWN: connection reset", ago(18, 5, 0)},
{"Monitor 'Jellyfin' failed check 2/2", ago(18, 5, 30)},
{"Monitor 'Jellyfin' failed check 1/2", ago(18, 6, 0)},
{"SSL warning: certificate for 'Personal Blog' expires in 9 days", ago(20, 0, 0)},
{"Engine RESUMED (Active)", ago(22, 0, 0)},
{"Loaded check history from database", ago(22, 0, 5)},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO logs (message, created_at) VALUES (?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, l := range logs {
// Bracket in local time to match the engine's live AddLog timestamps;
// created_at stays UTC to match the store's CURRENT_TIMESTAMP ordering.
msg := "[" + l.at.Local().Format("15:04") + "] " + l.text
if _, err := stmt.Exec(msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
return tx.Commit()
}
func backfillNodes(db *sql.DB, now time.Time) error {
// Multiple regions to show distributed probes. All seen "now" so they read ONLINE
// for the whole capture window (kept under the 60s freshness threshold by the tape).
nodes := []struct{ id, name, region string }{
{"node-use1", "leader", "us-east"},
{"node-euw1", "probe-eu", "eu-west"},
{"node-apse1", "probe-ap", "ap-southeast"},
}
ts := now.Format("2006-01-02 15:04:05")
for _, n := range nodes {
if _, err := db.Exec(
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
n.id, n.name, n.region, ts, "2026.05.1",
); err != nil {
return err
}
}
return nil
}
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
jellyfinID := ids["Jellyfin"]
past := now.Add(-3 * 24 * time.Hour)
if _, err := stmt.Exec(jellyfinID, "Jellyfin upgrade", "Upgrade to v10.10 + plugin updates", "maintenance",
past.Format("2006-01-02 15:04:05"),
past.Add(2*time.Hour).Format("2006-01-02 15:04:05"),
"admin"); err != nil {
return err
}
future := now.Add(2 * 24 * time.Hour)
if _, err := stmt.Exec(0, "Network switch replacement", "Replacing core switch in rack 2", "maintenance",
future.Format("2006-01-02 15:04:05"),
future.Add(4*time.Hour).Format("2006-01-02 15:04:05"),
"admin"); err != nil {
return err
}
return tx.Commit()
}
+123
View File
@@ -0,0 +1,123 @@
// Command crop trims the uniform background border around each VHS screenshot so the
// content fills the frame instead of floating in a large empty terminal. Sparse views
// (alerts, detail, nodes) would otherwise sit in a sea of dead space.
//
// Usage: crop [dir] (dir defaults to vhs/screenshots)
package main
import (
"fmt"
"image"
"image/color"
"image/png"
"os"
"path/filepath"
)
// pad is the margin (px) left around the detected content. tol is the per-channel
// colour distance (summed) above which a pixel counts as content rather than background.
const (
pad = 24
tol = 28
)
func main() {
dir := "vhs/screenshots"
if len(os.Args) > 1 {
dir = os.Args[1]
}
paths, err := filepath.Glob(filepath.Join(dir, "*.png"))
if err != nil {
fmt.Fprintf(os.Stderr, "glob: %v\n", err)
os.Exit(1)
}
if len(paths) == 0 {
fmt.Fprintf(os.Stderr, "no PNGs in %s\n", dir)
os.Exit(1)
}
for _, p := range paths {
w, h, err := cropFile(p)
if err != nil {
fmt.Fprintf(os.Stderr, "crop %s: %v\n", p, err)
os.Exit(1)
}
fmt.Printf("cropped %s -> %dx%d\n", filepath.Base(p), w, h)
}
}
func cropFile(path string) (int, int, error) {
f, err := os.Open(path) //nolint:gosec // dev tool: paths come from a trusted local glob
if err != nil {
return 0, 0, err
}
src, err := png.Decode(f)
_ = f.Close()
if err != nil {
return 0, 0, err
}
b := src.Bounds()
// Background colour sampled from a corner — always inside VHS's blank padding.
bgR, bgG, bgB := rgb(src.At(b.Min.X+2, b.Min.Y+2))
minX, minY := b.Max.X, b.Max.Y
maxX, maxY := b.Min.X, b.Min.Y
found := false
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
r, g, bl := rgb(src.At(x, y))
if abs(r-bgR)+abs(g-bgG)+abs(bl-bgB) > tol {
found = true
minX, minY = min(minX, x), min(minY, y)
maxX, maxY = max(maxX, x), max(maxY, y)
}
}
}
if !found {
return b.Dx(), b.Dy(), nil // blank frame — leave untouched
}
minX = clamp(minX-pad, b.Min.X, b.Max.X)
minY = clamp(minY-pad, b.Min.Y, b.Max.Y)
maxX = clamp(maxX+pad+1, b.Min.X, b.Max.X)
maxY = clamp(maxY+pad+1, b.Min.Y, b.Max.Y)
dst := image.NewRGBA(image.Rect(0, 0, maxX-minX, maxY-minY))
for y := minY; y < maxY; y++ {
for x := minX; x < maxX; x++ {
dst.Set(x-minX, y-minY, src.At(x, y))
}
}
out, err := os.Create(path) //nolint:gosec // dev tool: paths come from a trusted local glob
if err != nil {
return 0, 0, err
}
defer out.Close() //nolint:errcheck // best-effort close on write path
if err := png.Encode(out, dst); err != nil {
return 0, 0, err
}
return dst.Bounds().Dx(), dst.Bounds().Dy(), nil
}
func rgb(c color.Color) (int, int, int) {
r, g, b, _ := c.RGBA()
return int(r >> 8), int(g >> 8), int(b >> 8)
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func clamp(v, lo, hi int) int {
if v < lo {
return lo
}
if v > hi {
return hi
}
return v
}
+72
View File
@@ -0,0 +1,72 @@
Set Shell "bash"
Set Width 1400
Set Height 800
Set FontSize 14
Set Padding 20
Set Framerate 15
Set TypingSpeed 50ms
# Seed demo data + start uptop (UPTOP_DEMO=1 → stable pulse dot for stills).
Hide
Type "bash vhs/setup.sh /tmp/uptop-vhs.db"
Enter
# Warm-up: push heartbeat lands (~10s) and initial checks settle. Kept short so every
# capture stays inside the 60s node-freshness window (consistent "3 probes" footer).
Sleep 18s
Show
Sleep 2s
# 1. Sites — hero shot: mixed states, history sparklines, SSL, retries.
Screenshot vhs/screenshots/monitors.png
Sleep 1s
# 2. Detail — drill into Nextcloud (6th row from the top).
Down
Sleep 150ms
Down
Sleep 150ms
Down
Sleep 150ms
Down
Sleep 150ms
Down
Sleep 300ms
Type "i"
Sleep 2s
Screenshot vhs/screenshots/detail.png
Sleep 500ms
Escape
Sleep 1s
# 3. Alerts — channels with health dots + recent "last sent".
Tab
Sleep 1500ms
Screenshot vhs/screenshots/alerts.png
Sleep 500ms
# 4. Logs — chronological, severity-coloured event stream.
Tab
Sleep 1500ms
Screenshot vhs/screenshots/logs.png
Sleep 500ms
# 5. Nodes — distributed probes across regions.
Tab
Sleep 1500ms
Screenshot vhs/screenshots/nodes.png
Sleep 500ms
# 6. Theme — cycle to the next theme, return to Sites for an alternate-palette hero.
Type "T"
Sleep 500ms
Tab
Sleep 200ms
Tab
Sleep 200ms
Tab
Sleep 1s
Screenshot vhs/screenshots/theme.png
Sleep 500ms
Type "q"
Sleep 1s
Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 206 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 253 KiB

+141
View File
@@ -0,0 +1,141 @@
alerts:
- name: Discord Homelab
type: discord
settings:
url: https://discord.com/api/webhooks/1234567890/demo-token
- name: Ntfy Alerts
type: webhook
settings:
url: https://ntfy.example.com/homelab-alerts
- name: Email Oncall
type: email
settings:
host: smtp.example.com
port: "587"
user: alerts@example.com
pass: "••••••••"
from: alerts@example.com
to: oncall@example.com
- name: Slack Ops
type: slack
settings:
url: https://hooks.slack.com/services/T00000/B00000/demo-token
monitors:
# HTTP — homelab services
- name: Nextcloud
type: http
url: https://nextcloud.com
interval: 30
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 2
- name: Jellyfin
type: http
url: https://jellyfin.org
interval: 30
alert: Discord Homelab
max_retries: 2
- name: Home Assistant
type: http
url: https://www.home-assistant.io
interval: 30
alert: Discord Homelab
max_retries: 3
- name: Gitea
type: http
url: https://about.gitea.com
interval: 60
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 2
- name: Traefik Dashboard
type: http
url: https://traefik.io
interval: 60
alert: Discord Homelab
max_retries: 1
- name: Vaultwarden
type: http
url: https://bitwarden.com
interval: 30
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 3
- name: Personal Blog
type: http
url: https://jvns.ca
interval: 120
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 2
- name: Immich
type: http
url: https://immich.app
interval: 60
alert: Discord Homelab
check_ssl: true
expiry_threshold: 7
max_retries: 3
# HTTP — deliberate failure (non-resolving homelab host → stays DOWN)
- name: Auth Portal
type: http
url: https://auth.home.arpa
interval: 30
alert: Discord Homelab
max_retries: 2
# Push — cron jobs
- name: Nightly Backup
type: push
interval: 300
alert: Discord Homelab
- name: Cert Renewal
type: push
interval: 300
alert: Discord Homelab
# Infrastructure group
- name: Infrastructure
type: group
alert: Discord Homelab
monitors:
- name: Edge Router
type: ping
hostname: 8.8.8.8
interval: 30
alert: Discord Homelab
timeout: 5
- name: Postgres
type: port
hostname: localhost
port: 18099
interval: 60
alert: Discord Homelab
timeout: 5
- name: DNS Primary
type: dns
hostname: google.com
dns_server: 8.8.8.8
dns_resolve_type: A
interval: 60
alert: Discord Homelab
timeout: 5
Executable
+31
View File
@@ -0,0 +1,31 @@
#!/bin/bash
# VHS screenshot setup: seed monitors, backfill history, start server.
set -e
DB="${1:?usage: setup.sh <db-path>}"
rm -f "$DB" "$DB-shm" "$DB-wal"
echo "==> Seeding monitors and alerts..."
UPTOP_DB_DSN="$DB" ./uptop apply -f vhs/seed.yaml 2>&1
echo "==> Backfilling check history..."
# Build first so the backfill's `now` (node last_seen, heartbeat timing) isn't racing
# a cold compile — keeps the capture window deterministic.
go build -o /tmp/uptop-backfill ./vhs/backfill/
BACKFILL_OUT=$(/tmp/uptop-backfill "$DB")
echo "$BACKFILL_OUT"
PUSH_TOKEN=$(echo "$BACKFILL_OUT" | grep '^PUSH_TOKEN=' | cut -d= -f2)
if [ -n "$PUSH_TOKEN" ]; then
echo "==> Sending push heartbeat in 10s (background)..."
(sleep 10 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) &
fi
echo "==> Starting uptop server..."
exec env \
UPTOP_DB_DSN="$DB" \
UPTOP_PORT=23299 \
UPTOP_HTTP_PORT=18099 \
UPTOP_ALLOW_PRIVATE_TARGETS=true \
UPTOP_DEMO=1 \
./uptop serve 2>/dev/null