5 Commits

Author SHA1 Message Date
lerko c9fb006042 chore: ignore sqlite WAL sidecars and stray vhs build binaries
CI / test (pull_request) Successful in 2m38s
CI / lint (pull_request) Successful in 1m12s
CI / vulncheck (pull_request) Successful in 56s
uptop.db* now covers the -shm/-wal sidecars; /backfill and /crop catch
binaries left by `go build ./vhs/<tool>` without -o.
2026-05-29 11:45:47 -04:00
lerko 03cbe283df chore(tui): polish demo + regenerate screenshots
CI / test (pull_request) Successful in 2m45s
CI / lint (pull_request) Successful in 1m4s
CI / vulncheck (pull_request) Successful in 56s
Rework the VHS demo so the README screenshots actually entice a download.

Demo data / tooling:
- seed.yaml: real, reachable service URLs (detail now shows nextcloud.com,
  not example.com); Auth Portal -> non-resolving home.arpa host so it reads
  as a believable, reliably-DOWN monitor
- backfill: transient outages for Nextcloud/Jellyfin/Immich aligned with their
  state changes (uptime % now matches); log timestamps derived from now so the
  Logs view reads chronologically; real SSL warning; three probe nodes across
  regions; seeded alert send health
- demo.tape: shorter warm-up, added Nodes + theme captures, ordered so every
  shot stays inside the 60s node-freshness window (consistent probe count)
- vhs/crop: new tool to trim the empty terminal border around each screenshot
- setup.sh: build backfill up front for deterministic timing; UPTOP_DEMO=1

Supporting code:
- persist alert send health (new alert_health table, load on startup,
  best-effort save on send) so health/last-sent survive restarts
- latency Min/Avg/Max ignore failed checks (no more "Min 0ms")
- correct "probe"/"probes" pluralization
- stable status dot instead of an animated spinner under UPTOP_DEMO
2026-05-28 22:32:45 -04:00
lerko 9c7ed284b3 fix(lint): suppress gosec G404 for demo data RNG
CI / test (pull_request) Successful in 2m48s
CI / lint (pull_request) Successful in 1m7s
CI / vulncheck (pull_request) Successful in 56s
2026-05-28 19:00:45 -04:00
lerko ff85abb2c9 fix(lint): resolve errcheck and gosec warnings in vhs backfill tool
CI / test (pull_request) Successful in 2m52s
CI / lint (pull_request) Failing after 1m11s
CI / vulncheck (pull_request) Successful in 56s
2026-05-28 18:50:56 -04:00
lerko 10f249a2ae chore: add TUI screenshots via VHS
CI / test (pull_request) Successful in 2m50s
CI / lint (pull_request) Failing after 1m12s
CI / vulncheck (pull_request) Successful in 56s
Screenshots capture 4 views: monitors dashboard (hero), detail panel,
alerts tab, and logs tab. Includes VHS tape, demo seed config, and
setup script for reproducible captures.

Also fixes latencySparkline to color DOWN checks red instead of green
— previously failed checks with 0ms latency rendered as green bars.
2026-05-28 18:30:39 -04:00
12 changed files with 739 additions and 0 deletions
+4
View File
@@ -27,6 +27,10 @@ go.work
# End of https://www.toptal.com/developers/gitignore/api/go # End of https://www.toptal.com/developers/gitignore/api/go
/uptop /uptop
# stray binaries from `go build ./vhs/<tool>` without -o
/backfill
/crop
# sqlite db + WAL sidecars (-shm/-wal)
uptop.db* uptop.db*
.ssh .ssh
+368
View File
@@ -0,0 +1,368 @@
package main
import (
"database/sql"
"fmt"
"math/rand/v2"
"os"
"time"
_ "github.com/mattn/go-sqlite3"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "usage: backfill <db-path>")
os.Exit(1)
}
db, err := sql.Open("sqlite3", os.Args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "open: %v\n", err)
os.Exit(1)
}
defer db.Close()
ids, err := loadSiteIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load site IDs: %v\n", err)
os.Exit(1)
}
rng := rand.New(rand.NewPCG(42, 0)) //nolint:gosec // deterministic seed for reproducible demo data
now := time.Now().UTC()
if err := backfillHistory(db, rng, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "history: %v\n", err)
os.Exit(1)
}
if err := backfillStateChanges(db, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "state changes: %v\n", err)
os.Exit(1)
}
if err := backfillLogs(db, now); err != nil {
fmt.Fprintf(os.Stderr, "logs: %v\n", err)
os.Exit(1)
}
if err := backfillNodes(db, now); err != nil {
fmt.Fprintf(os.Stderr, "nodes: %v\n", err)
os.Exit(1)
}
if err := backfillMaintenance(db, now, ids); err != nil {
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
os.Exit(1)
}
alertIDs, err := loadAlertIDs(db)
if err != nil {
fmt.Fprintf(os.Stderr, "load alert IDs: %v\n", err)
os.Exit(1)
}
if err := backfillAlertHealth(db, now, alertIDs); err != nil {
fmt.Fprintf(os.Stderr, "alert health: %v\n", err)
os.Exit(1)
}
var count int
_ = db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
fmt.Printf("Backfill complete: %d check records\n", count)
var token string
if err := db.QueryRow("SELECT token FROM sites WHERE name='Nightly Backup'").Scan(&token); err == nil {
fmt.Printf("PUSH_TOKEN=%s\n", token)
}
}
func loadSiteIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM sites")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func loadAlertIDs(db *sql.DB) (map[string]int, error) {
rows, err := db.Query("SELECT id, name FROM alerts")
if err != nil {
return nil, err
}
return scanNameIDs(rows)
}
func scanNameIDs(rows *sql.Rows) (map[string]int, error) {
defer rows.Close()
ids := make(map[string]int)
for rows.Next() {
var id int
var name string
if err := rows.Scan(&id, &name); err != nil {
return nil, err
}
ids[name] = id
}
return ids, rows.Err()
}
// backfillAlertHealth seeds realistic send health so the Alerts tab shows recent,
// healthy "last sent" times and green health dots instead of "never" across the board.
func backfillAlertHealth(db *sql.DB, now time.Time, alertIDs map[string]int) error {
type health struct {
name string
sentAgo time.Duration
ok bool
sends int
fails int
}
rows := []health{
{"Discord Homelab", 4 * time.Minute, true, 37, 0},
{"Slack Ops", 9 * time.Minute, true, 21, 1},
{"Ntfy Alerts", 1 * time.Hour, true, 12, 0},
{"Email Oncall", 3 * time.Hour, true, 5, 0},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, r := range rows {
id, ok := alertIDs[r.name]
if !ok {
continue
}
sentAt := now.Add(-r.sentAgo).Format("2006-01-02 15:04:05")
if _, err := stmt.Exec(id, sentAt, r.ok, "", r.sends, r.fails); err != nil {
return err
}
}
return tx.Commit()
}
type monitorProfile struct {
name string
minMs int
maxMs int
downFrom int // first DOWN check index (-1 = always up)
downTo int // exclusive end of the DOWN window; use 60 (total) for a still-down monitor
}
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
// Latency ranges reflect monitoring public services over the internet, so the
// detail histogram brackets the live latency the engine measures at capture time.
// 60 checks * 24m spacing = a 24h window; dip indices place outages within it.
profiles := []monitorProfile{
{"Nextcloud", 200, 600, 47, 48}, // brief blip ~5h ago, recovered
{"Jellyfin", 40, 180, 15, 16}, // brief blip ~18h ago, recovered
{"Home Assistant", 30, 120, -1, 0}, //
{"Gitea", 50, 200, -1, 0}, //
{"Traefik Dashboard", 60, 200, -1, 0}, //
{"Vaultwarden", 80, 250, -1, 0}, //
{"Personal Blog", 40, 160, -1, 0}, //
{"Immich", 60, 300, 30, 31}, // brief blip ~12h ago; periodic spikes below
{"Auth Portal", 30, 90, 40, 60}, // DOWN ~8h ago, still down
{"Edge Router", 5, 20, -1, 0}, // ping
{"Postgres", 1, 6, -1, 0}, // port
{"DNS Primary", 8, 30, -1, 0}, // dns
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO check_history (site_id, latency_ns, is_up, checked_at) VALUES (?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
const total = 60
for _, p := range profiles {
siteID, ok := ids[p.name]
if !ok {
continue
}
for i := 0; i < total; i++ {
minutesAgo := (total - i) * 24
checkedAt := now.Add(-time.Duration(minutesAgo) * time.Minute)
var latencyNs int64
isUp := true
if p.downFrom >= 0 && i >= p.downFrom && i < p.downTo {
latencyNs = 0
isUp = false
} else {
ms := p.minMs + rng.IntN(p.maxMs-p.minMs)
if p.name == "Immich" && i%17 == 0 {
ms = 250 + rng.IntN(100)
}
latencyNs = int64(ms) * 1_000_000
}
if _, err := stmt.Exec(siteID, latencyNs, isUp, checkedAt.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
}
return tx.Commit()
}
func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
type sc struct {
name string
from string
to string
reason string
at time.Time
}
// Timed to line up with the history dips (Nextcloud ~5h, Immich ~12h, Jellyfin ~18h)
// and the still-down Auth Portal (~8h), so detail panels read coherently.
changes := []sc{
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-5 * time.Hour).Add(-8 * time.Minute)},
{"Nextcloud", "DOWN", "UP", "", now.Add(-5 * time.Hour)},
{"Auth Portal", "UP", "DOWN", "no such host", now.Add(-8 * time.Hour)},
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-5 * time.Minute)},
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO state_changes (site_id, from_status, to_status, error_reason, changed_at) VALUES (?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, c := range changes {
siteID, ok := ids[c.name]
if !ok {
continue
}
if _, err := stmt.Exec(siteID, c.from, c.to, c.reason, c.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
return tx.Commit()
}
func backfillLogs(db *sql.DB, now time.Time) error {
type logEntry struct {
text string
at time.Time
}
ago := func(h, m, s int) time.Time {
return now.Add(-(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute + time.Duration(s)*time.Second))
}
// Ordered newest-first. The bracket time is derived from `at` (not hardcoded), so the
// Logs view — which renders the leading [HH:MM] — reads chronologically. Outage times
// line up with the state changes and history dips above.
logs := []logEntry{
{"Monitor 'Nextcloud' recovered (was down 8m)", ago(5, 0, 0)},
{"Monitor 'Nextcloud' confirmed DOWN: read timeout", ago(5, 8, 0)},
{"Monitor 'Nextcloud' failed check 2/2", ago(5, 8, 30)},
{"Monitor 'Nextcloud' failed check 1/2", ago(5, 9, 0)},
{"Monitor 'Auth Portal' confirmed DOWN: no such host", ago(8, 0, 0)},
{"Monitor 'Auth Portal' failed check 2/2", ago(8, 0, 30)},
{"Monitor 'Auth Portal' failed check 1/2", ago(8, 1, 0)},
{"Monitor 'Immich' recovered (was down 8m)", ago(12, 0, 0)},
{"Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", ago(12, 8, 0)},
{"Monitor 'Immich' failed check 3/3", ago(12, 8, 30)},
{"Monitor 'Immich' failed check 2/3", ago(12, 9, 0)},
{"Monitor 'Immich' failed check 1/3", ago(12, 9, 30)},
{"Monitor 'Jellyfin' recovered (was down 5m)", ago(18, 0, 0)},
{"Monitor 'Jellyfin' confirmed DOWN: connection reset", ago(18, 5, 0)},
{"Monitor 'Jellyfin' failed check 2/2", ago(18, 5, 30)},
{"Monitor 'Jellyfin' failed check 1/2", ago(18, 6, 0)},
{"SSL warning: certificate for 'Personal Blog' expires in 9 days", ago(20, 0, 0)},
{"Engine RESUMED (Active)", ago(22, 0, 0)},
{"Loaded check history from database", ago(22, 0, 5)},
}
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO logs (message, created_at) VALUES (?, ?)")
if err != nil {
return err
}
defer stmt.Close()
for _, l := range logs {
// Bracket in local time to match the engine's live AddLog timestamps;
// created_at stays UTC to match the store's CURRENT_TIMESTAMP ordering.
msg := "[" + l.at.Local().Format("15:04") + "] " + l.text
if _, err := stmt.Exec(msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
return err
}
}
return tx.Commit()
}
func backfillNodes(db *sql.DB, now time.Time) error {
// Multiple regions to show distributed probes. All seen "now" so they read ONLINE
// for the whole capture window (kept under the 60s freshness threshold by the tape).
nodes := []struct{ id, name, region string }{
{"node-use1", "leader", "us-east"},
{"node-euw1", "probe-eu", "eu-west"},
{"node-apse1", "probe-ap", "ap-southeast"},
}
ts := now.Format("2006-01-02 15:04:05")
for _, n := range nodes {
if _, err := db.Exec(
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
n.id, n.name, n.region, ts, "2026.05.1",
); err != nil {
return err
}
}
return nil
}
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
tx, err := db.Begin()
if err != nil {
return err
}
defer func() { _ = tx.Rollback() }()
stmt, err := tx.Prepare("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)")
if err != nil {
return err
}
defer stmt.Close()
jellyfinID := ids["Jellyfin"]
past := now.Add(-3 * 24 * time.Hour)
if _, err := stmt.Exec(jellyfinID, "Jellyfin upgrade", "Upgrade to v10.10 + plugin updates", "maintenance",
past.Format("2006-01-02 15:04:05"),
past.Add(2*time.Hour).Format("2006-01-02 15:04:05"),
"admin"); err != nil {
return err
}
future := now.Add(2 * 24 * time.Hour)
if _, err := stmt.Exec(0, "Network switch replacement", "Replacing core switch in rack 2", "maintenance",
future.Format("2006-01-02 15:04:05"),
future.Add(4*time.Hour).Format("2006-01-02 15:04:05"),
"admin"); err != nil {
return err
}
return tx.Commit()
}
+123
View File
@@ -0,0 +1,123 @@
// Command crop trims the uniform background border around each VHS screenshot so the
// content fills the frame instead of floating in a large empty terminal. Sparse views
// (alerts, detail, nodes) would otherwise sit in a sea of dead space.
//
// Usage: crop [dir] (dir defaults to vhs/screenshots)
package main
import (
"fmt"
"image"
"image/color"
"image/png"
"os"
"path/filepath"
)
// pad is the margin (px) left around the detected content. tol is the per-channel
// colour distance (summed) above which a pixel counts as content rather than background.
const (
pad = 24
tol = 28
)
func main() {
dir := "vhs/screenshots"
if len(os.Args) > 1 {
dir = os.Args[1]
}
paths, err := filepath.Glob(filepath.Join(dir, "*.png"))
if err != nil {
fmt.Fprintf(os.Stderr, "glob: %v\n", err)
os.Exit(1)
}
if len(paths) == 0 {
fmt.Fprintf(os.Stderr, "no PNGs in %s\n", dir)
os.Exit(1)
}
for _, p := range paths {
w, h, err := cropFile(p)
if err != nil {
fmt.Fprintf(os.Stderr, "crop %s: %v\n", p, err)
os.Exit(1)
}
fmt.Printf("cropped %s -> %dx%d\n", filepath.Base(p), w, h)
}
}
func cropFile(path string) (int, int, error) {
f, err := os.Open(path) //nolint:gosec // dev tool: paths come from a trusted local glob
if err != nil {
return 0, 0, err
}
src, err := png.Decode(f)
_ = f.Close()
if err != nil {
return 0, 0, err
}
b := src.Bounds()
// Background colour sampled from a corner — always inside VHS's blank padding.
bgR, bgG, bgB := rgb(src.At(b.Min.X+2, b.Min.Y+2))
minX, minY := b.Max.X, b.Max.Y
maxX, maxY := b.Min.X, b.Min.Y
found := false
for y := b.Min.Y; y < b.Max.Y; y++ {
for x := b.Min.X; x < b.Max.X; x++ {
r, g, bl := rgb(src.At(x, y))
if abs(r-bgR)+abs(g-bgG)+abs(bl-bgB) > tol {
found = true
minX, minY = min(minX, x), min(minY, y)
maxX, maxY = max(maxX, x), max(maxY, y)
}
}
}
if !found {
return b.Dx(), b.Dy(), nil // blank frame — leave untouched
}
minX = clamp(minX-pad, b.Min.X, b.Max.X)
minY = clamp(minY-pad, b.Min.Y, b.Max.Y)
maxX = clamp(maxX+pad+1, b.Min.X, b.Max.X)
maxY = clamp(maxY+pad+1, b.Min.Y, b.Max.Y)
dst := image.NewRGBA(image.Rect(0, 0, maxX-minX, maxY-minY))
for y := minY; y < maxY; y++ {
for x := minX; x < maxX; x++ {
dst.Set(x-minX, y-minY, src.At(x, y))
}
}
out, err := os.Create(path) //nolint:gosec // dev tool: paths come from a trusted local glob
if err != nil {
return 0, 0, err
}
defer out.Close() //nolint:errcheck // best-effort close on write path
if err := png.Encode(out, dst); err != nil {
return 0, 0, err
}
return dst.Bounds().Dx(), dst.Bounds().Dy(), nil
}
func rgb(c color.Color) (int, int, int) {
r, g, b, _ := c.RGBA()
return int(r >> 8), int(g >> 8), int(b >> 8)
}
func abs(x int) int {
if x < 0 {
return -x
}
return x
}
func clamp(v, lo, hi int) int {
if v < lo {
return lo
}
if v > hi {
return hi
}
return v
}
+72
View File
@@ -0,0 +1,72 @@
Set Shell "bash"
Set Width 1400
Set Height 800
Set FontSize 14
Set Padding 20
Set Framerate 15
Set TypingSpeed 50ms
# Seed demo data + start uptop (UPTOP_DEMO=1 → stable pulse dot for stills).
Hide
Type "bash vhs/setup.sh /tmp/uptop-vhs.db"
Enter
# Warm-up: push heartbeat lands (~10s) and initial checks settle. Kept short so every
# capture stays inside the 60s node-freshness window (consistent "3 probes" footer).
Sleep 18s
Show
Sleep 2s
# 1. Sites — hero shot: mixed states, history sparklines, SSL, retries.
Screenshot vhs/screenshots/monitors.png
Sleep 1s
# 2. Detail — drill into Nextcloud (6th row from the top).
Down
Sleep 150ms
Down
Sleep 150ms
Down
Sleep 150ms
Down
Sleep 150ms
Down
Sleep 300ms
Type "i"
Sleep 2s
Screenshot vhs/screenshots/detail.png
Sleep 500ms
Escape
Sleep 1s
# 3. Alerts — channels with health dots + recent "last sent".
Tab
Sleep 1500ms
Screenshot vhs/screenshots/alerts.png
Sleep 500ms
# 4. Logs — chronological, severity-coloured event stream.
Tab
Sleep 1500ms
Screenshot vhs/screenshots/logs.png
Sleep 500ms
# 5. Nodes — distributed probes across regions.
Tab
Sleep 1500ms
Screenshot vhs/screenshots/nodes.png
Sleep 500ms
# 6. Theme — cycle to the next theme, return to Sites for an alternate-palette hero.
Type "T"
Sleep 500ms
Tab
Sleep 200ms
Tab
Sleep 200ms
Tab
Sleep 1s
Screenshot vhs/screenshots/theme.png
Sleep 500ms
Type "q"
Sleep 1s

Before

Width:  |  Height:  |  Size: 84 KiB

After

Width:  |  Height:  |  Size: 84 KiB

Before

Width:  |  Height:  |  Size: 78 KiB

After

Width:  |  Height:  |  Size: 78 KiB

Before

Width:  |  Height:  |  Size: 206 KiB

After

Width:  |  Height:  |  Size: 206 KiB

Before

Width:  |  Height:  |  Size: 232 KiB

After

Width:  |  Height:  |  Size: 232 KiB

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 57 KiB

Before

Width:  |  Height:  |  Size: 253 KiB

After

Width:  |  Height:  |  Size: 253 KiB

+141
View File
@@ -0,0 +1,141 @@
alerts:
- name: Discord Homelab
type: discord
settings:
url: https://discord.com/api/webhooks/1234567890/demo-token
- name: Ntfy Alerts
type: webhook
settings:
url: https://ntfy.example.com/homelab-alerts
- name: Email Oncall
type: email
settings:
host: smtp.example.com
port: "587"
user: alerts@example.com
pass: "••••••••"
from: alerts@example.com
to: oncall@example.com
- name: Slack Ops
type: slack
settings:
url: https://hooks.slack.com/services/T00000/B00000/demo-token
monitors:
# HTTP — homelab services
- name: Nextcloud
type: http
url: https://nextcloud.com
interval: 30
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 2
- name: Jellyfin
type: http
url: https://jellyfin.org
interval: 30
alert: Discord Homelab
max_retries: 2
- name: Home Assistant
type: http
url: https://www.home-assistant.io
interval: 30
alert: Discord Homelab
max_retries: 3
- name: Gitea
type: http
url: https://about.gitea.com
interval: 60
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 2
- name: Traefik Dashboard
type: http
url: https://traefik.io
interval: 60
alert: Discord Homelab
max_retries: 1
- name: Vaultwarden
type: http
url: https://bitwarden.com
interval: 30
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 3
- name: Personal Blog
type: http
url: https://jvns.ca
interval: 120
alert: Discord Homelab
check_ssl: true
expiry_threshold: 14
max_retries: 2
- name: Immich
type: http
url: https://immich.app
interval: 60
alert: Discord Homelab
check_ssl: true
expiry_threshold: 7
max_retries: 3
# HTTP — deliberate failure (non-resolving homelab host → stays DOWN)
- name: Auth Portal
type: http
url: https://auth.home.arpa
interval: 30
alert: Discord Homelab
max_retries: 2
# Push — cron jobs
- name: Nightly Backup
type: push
interval: 300
alert: Discord Homelab
- name: Cert Renewal
type: push
interval: 300
alert: Discord Homelab
# Infrastructure group
- name: Infrastructure
type: group
alert: Discord Homelab
monitors:
- name: Edge Router
type: ping
hostname: 8.8.8.8
interval: 30
alert: Discord Homelab
timeout: 5
- name: Postgres
type: port
hostname: localhost
port: 18099
interval: 60
alert: Discord Homelab
timeout: 5
- name: DNS Primary
type: dns
hostname: google.com
dns_server: 8.8.8.8
dns_resolve_type: A
interval: 60
alert: Discord Homelab
timeout: 5
Executable
+31
View File
@@ -0,0 +1,31 @@
#!/bin/bash
# VHS screenshot setup: seed monitors, backfill history, start server.
set -e
DB="${1:?usage: setup.sh <db-path>}"
rm -f "$DB" "$DB-shm" "$DB-wal"
echo "==> Seeding monitors and alerts..."
UPTOP_DB_DSN="$DB" ./uptop apply -f vhs/seed.yaml 2>&1
echo "==> Backfilling check history..."
# Build first so the backfill's `now` (node last_seen, heartbeat timing) isn't racing
# a cold compile — keeps the capture window deterministic.
go build -o /tmp/uptop-backfill ./vhs/backfill/
BACKFILL_OUT=$(/tmp/uptop-backfill "$DB")
echo "$BACKFILL_OUT"
PUSH_TOKEN=$(echo "$BACKFILL_OUT" | grep '^PUSH_TOKEN=' | cut -d= -f2)
if [ -n "$PUSH_TOKEN" ]; then
echo "==> Sending push heartbeat in 10s (background)..."
(sleep 10 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) &
fi
echo "==> Starting uptop server..."
exec env \
UPTOP_DB_DSN="$DB" \
UPTOP_PORT=23299 \
UPTOP_HTTP_PORT=18099 \
UPTOP_ALLOW_PRIVATE_TARGETS=true \
UPTOP_DEMO=1 \
./uptop serve 2>/dev/null