Compare commits
5 Commits
2026.05.5
...
c9fb006042
| Author | SHA1 | Date | |
|---|---|---|---|
| c9fb006042 | |||
| 03cbe283df | |||
| 9c7ed284b3 | |||
| ff85abb2c9 | |||
| 10f249a2ae |
+5
-1
@@ -27,7 +27,11 @@ go.work
|
||||
# End of https://www.toptal.com/developers/gitignore/api/go
|
||||
|
||||
/uptop
|
||||
uptop.db
|
||||
# stray binaries from `go build ./vhs/<tool>` without -o
|
||||
/backfill
|
||||
/crop
|
||||
# sqlite db + WAL sidecars (-shm/-wal)
|
||||
uptop.db*
|
||||
|
||||
.ssh
|
||||
|
||||
|
||||
@@ -385,6 +385,7 @@ func runServe(args []string) {
|
||||
|
||||
eng.InitHistory()
|
||||
eng.InitLogs()
|
||||
eng.InitAlertHealth()
|
||||
eng.Start(ctx)
|
||||
|
||||
tlsCert := os.Getenv("UPTOP_TLS_CERT")
|
||||
|
||||
@@ -53,6 +53,10 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return models.Pr
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
|
||||
|
||||
@@ -51,6 +51,10 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return m
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
|
||||
|
||||
@@ -79,6 +79,17 @@ type ProbeNode struct {
|
||||
Version string
|
||||
}
|
||||
|
||||
// AlertHealthRecord is the persisted send health of an alert channel. It lets the
|
||||
// "last sent" / health indicators survive restarts instead of resetting to "never".
|
||||
type AlertHealthRecord struct {
|
||||
AlertID int
|
||||
LastSendAt time.Time
|
||||
LastSendOK bool
|
||||
LastError string
|
||||
SendCount int
|
||||
FailCount int
|
||||
}
|
||||
|
||||
type MaintenanceWindow struct {
|
||||
ID int
|
||||
MonitorID int
|
||||
|
||||
@@ -146,6 +146,26 @@ func (e *Engine) InitLogs() {
|
||||
e.logStore = logs
|
||||
}
|
||||
|
||||
// InitAlertHealth restores persisted alert send health so the dashboard shows real
|
||||
// "last sent" / health state on startup instead of resetting every channel to "never".
|
||||
func (e *Engine) InitAlertHealth() {
|
||||
records, err := e.db.LoadAlertHealth()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
e.alertHealthMu.Lock()
|
||||
defer e.alertHealthMu.Unlock()
|
||||
for id, r := range records {
|
||||
e.alertHealth[id] = AlertHealth{
|
||||
LastSendAt: r.LastSendAt,
|
||||
LastSendOK: r.LastSendOK,
|
||||
LastError: r.LastError,
|
||||
SendCount: r.SendCount,
|
||||
FailCount: r.FailCount,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) GetLogs() []string {
|
||||
e.logMu.RLock()
|
||||
defer e.logMu.RUnlock()
|
||||
@@ -612,6 +632,18 @@ func (e *Engine) recordAlertResult(alertID int, ok bool, errMsg string) {
|
||||
h.FailCount++
|
||||
}
|
||||
e.alertHealth[alertID] = h
|
||||
|
||||
// Persist best-effort so health survives restarts; DB IO off the alert path.
|
||||
go func(rec models.AlertHealthRecord) {
|
||||
_ = e.db.SaveAlertHealth(rec)
|
||||
}(models.AlertHealthRecord{
|
||||
AlertID: alertID,
|
||||
LastSendAt: h.LastSendAt,
|
||||
LastSendOK: h.LastSendOK,
|
||||
LastError: h.LastError,
|
||||
SendCount: h.SendCount,
|
||||
FailCount: h.FailCount,
|
||||
})
|
||||
}
|
||||
|
||||
func (e *Engine) GetAlertHealth(alertID int) AlertHealth {
|
||||
|
||||
@@ -63,6 +63,10 @@ func (m *mockStore) GetNode(string) (models.ProbeNode, error) { return m
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
@@ -65,6 +65,10 @@ func (m *mockStore) AddAlertReturningID(string, string, map[string]string) (int,
|
||||
func (m *mockStore) GetAllNodes() ([]models.ProbeNode, error) { return nil, nil }
|
||||
func (m *mockStore) UpdateNodeLastSeen(string) error { return nil }
|
||||
func (m *mockStore) DeleteNode(string) error { return nil }
|
||||
func (m *mockStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
return nil, nil
|
||||
}
|
||||
func (m *mockStore) SaveAlertHealth(models.AlertHealthRecord) error { return nil }
|
||||
func (m *mockStore) SaveLog(string) error { return nil }
|
||||
func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil }
|
||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||
|
||||
@@ -14,6 +14,7 @@ type Dialect interface {
|
||||
ImportWipe(tx *sql.Tx)
|
||||
ImportResetSequences(tx *sql.Tx)
|
||||
UpsertNodeSQL() string
|
||||
UpsertAlertHealthSQL() string
|
||||
}
|
||||
|
||||
func rewritePlaceholders(query string, dollarStyle bool) string {
|
||||
|
||||
@@ -81,6 +81,14 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
|
||||
changed_at TIMESTAMP DEFAULT NOW()
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
`CREATE TABLE IF NOT EXISTS alert_health (
|
||||
alert_id INTEGER PRIMARY KEY,
|
||||
last_send_at TIMESTAMP,
|
||||
last_send_ok BOOLEAN DEFAULT FALSE,
|
||||
last_error TEXT DEFAULT '',
|
||||
send_count INTEGER DEFAULT 0,
|
||||
fail_count INTEGER DEFAULT 0
|
||||
)`,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,6 +114,10 @@ func (d *PostgresDialect) UpsertNodeSQL() string {
|
||||
return "INSERT INTO nodes (id, name, region, last_seen, version) VALUES ($1, $2, $3, NOW(), $4) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, region = EXCLUDED.region, last_seen = NOW(), version = EXCLUDED.version"
|
||||
}
|
||||
|
||||
func (d *PostgresDialect) UpsertAlertHealthSQL() string {
|
||||
return "INSERT INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES ($1, $2, $3, $4, $5, $6) ON CONFLICT (alert_id) DO UPDATE SET last_send_at = EXCLUDED.last_send_at, last_send_ok = EXCLUDED.last_send_ok, last_error = EXCLUDED.last_error, send_count = EXCLUDED.send_count, fail_count = EXCLUDED.fail_count"
|
||||
}
|
||||
|
||||
func (d *PostgresDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {}
|
||||
|
||||
func (d *PostgresDialect) ImportWipe(tx *sql.Tx) {
|
||||
|
||||
@@ -88,6 +88,14 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
|
||||
changed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
)`,
|
||||
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||
`CREATE TABLE IF NOT EXISTS alert_health (
|
||||
alert_id INTEGER PRIMARY KEY,
|
||||
last_send_at DATETIME,
|
||||
last_send_ok BOOLEAN DEFAULT 0,
|
||||
last_error TEXT DEFAULT '',
|
||||
send_count INTEGER DEFAULT 0,
|
||||
fail_count INTEGER DEFAULT 0
|
||||
)`,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,6 +121,10 @@ func (d *SQLiteDialect) UpsertNodeSQL() string {
|
||||
return "INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, CURRENT_TIMESTAMP, ?)"
|
||||
}
|
||||
|
||||
func (d *SQLiteDialect) UpsertAlertHealthSQL() string {
|
||||
return "INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)"
|
||||
}
|
||||
|
||||
func (d *SQLiteDialect) ResetSequenceOnEmpty(db *sql.DB, table string) {
|
||||
var count int
|
||||
_ = db.QueryRow("SELECT COUNT(*) FROM " + table).Scan(&count) //nolint:errcheck
|
||||
|
||||
@@ -430,6 +430,37 @@ func (s *SQLStore) DeleteNode(id string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) LoadAlertHealth() (map[int]models.AlertHealthRecord, error) {
|
||||
rows, err := s.db.Query("SELECT alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count FROM alert_health")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
out := make(map[int]models.AlertHealthRecord)
|
||||
for rows.Next() {
|
||||
var r models.AlertHealthRecord
|
||||
var lastSend sql.NullTime
|
||||
if err := rows.Scan(&r.AlertID, &lastSend, &r.LastSendOK, &r.LastError, &r.SendCount, &r.FailCount); err != nil {
|
||||
return out, err
|
||||
}
|
||||
if lastSend.Valid {
|
||||
r.LastSendAt = lastSend.Time
|
||||
}
|
||||
out[r.AlertID] = r
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveAlertHealth(h models.AlertHealthRecord) error {
|
||||
var lastSend interface{}
|
||||
if !h.LastSendAt.IsZero() {
|
||||
lastSend = h.LastSendAt
|
||||
}
|
||||
_, err := s.db.Exec(s.dialect.UpsertAlertHealthSQL(),
|
||||
h.AlertID, lastSend, h.LastSendOK, h.LastError, h.SendCount, h.FailCount)
|
||||
return err
|
||||
}
|
||||
|
||||
func (s *SQLStore) SaveLog(message string) error {
|
||||
_, err := s.db.Exec(s.q("INSERT INTO logs (message) VALUES (?)"), message)
|
||||
if err != nil {
|
||||
|
||||
@@ -49,6 +49,10 @@ type Store interface {
|
||||
UpdateNodeLastSeen(id string) error
|
||||
DeleteNode(id string) error
|
||||
|
||||
// Alert Health
|
||||
LoadAlertHealth() (map[int]models.AlertHealthRecord, error)
|
||||
SaveAlertHealth(h models.AlertHealthRecord) error
|
||||
|
||||
// Logs
|
||||
SaveLog(message string) error
|
||||
LoadLogs(limit int) ([]string, error)
|
||||
|
||||
+29
-13
@@ -60,14 +60,18 @@ type siteFormData struct {
|
||||
Regions string
|
||||
}
|
||||
|
||||
func latencySparkline(latencies []time.Duration, width int) string {
|
||||
func latencySparkline(latencies []time.Duration, statuses []bool, width int) string {
|
||||
if len(latencies) == 0 {
|
||||
return subtleStyle.Render(strings.Repeat("·", width))
|
||||
}
|
||||
|
||||
samples := latencies
|
||||
sampledStatuses := statuses
|
||||
if len(samples) > width {
|
||||
samples = samples[len(samples)-width:]
|
||||
if len(sampledStatuses) > width {
|
||||
sampledStatuses = sampledStatuses[len(sampledStatuses)-width:]
|
||||
}
|
||||
}
|
||||
|
||||
minL, maxL := samples[0], samples[0]
|
||||
@@ -85,7 +89,7 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
sb.WriteString(subtleStyle.Render(strings.Repeat("·", remaining)))
|
||||
}
|
||||
spread := maxL - minL
|
||||
for _, l := range samples {
|
||||
for i, l := range samples {
|
||||
idx := 0
|
||||
if spread > 0 {
|
||||
idx = int(float64(l-minL) / float64(spread) * 7)
|
||||
@@ -94,6 +98,10 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
}
|
||||
}
|
||||
ch := string(sparkChars[idx])
|
||||
isDown := i < len(sampledStatuses) && !sampledStatuses[i]
|
||||
if isDown {
|
||||
sb.WriteString(dangerStyle.Render(ch))
|
||||
} else {
|
||||
ms := l.Milliseconds()
|
||||
if ms < 200 {
|
||||
sb.WriteString(specialStyle.Render(ch))
|
||||
@@ -103,6 +111,7 @@ func latencySparkline(latencies []time.Duration, width int) string {
|
||||
sb.WriteString(dangerStyle.Render(ch))
|
||||
}
|
||||
}
|
||||
}
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
@@ -474,7 +483,7 @@ func (m Model) viewSitesTab() string {
|
||||
if site.Type == "push" {
|
||||
spark = heartbeatSparkline(hist.Statuses, sparkWidth)
|
||||
} else {
|
||||
spark = latencySparkline(hist.Latencies, sparkWidth)
|
||||
spark = latencySparkline(hist.Latencies, hist.Statuses, sparkWidth)
|
||||
}
|
||||
|
||||
rows = append(rows, []string{
|
||||
@@ -949,20 +958,27 @@ func (m Model) viewDetailPanel() string {
|
||||
up, len(hist.Statuses))
|
||||
}
|
||||
} else {
|
||||
b.WriteString(" " + latencySparkline(hist.Latencies, sparkWidth))
|
||||
if len(hist.Latencies) > 0 {
|
||||
minL, maxL := hist.Latencies[0], hist.Latencies[0]
|
||||
var total time.Duration
|
||||
for _, l := range hist.Latencies {
|
||||
total += l
|
||||
if l < minL {
|
||||
minL = l
|
||||
b.WriteString(" " + latencySparkline(hist.Latencies, hist.Statuses, sparkWidth))
|
||||
// Stats over successful checks only — a failed check is stored as 0ns latency
|
||||
// and would otherwise drag Min to 0ms and skew the average.
|
||||
var minL, maxL, total time.Duration
|
||||
count := 0
|
||||
for i, l := range hist.Latencies {
|
||||
if i < len(hist.Statuses) && !hist.Statuses[i] {
|
||||
continue
|
||||
}
|
||||
if l > maxL {
|
||||
if count == 0 {
|
||||
minL, maxL = l, l
|
||||
} else if l < minL {
|
||||
minL = l
|
||||
} else if l > maxL {
|
||||
maxL = l
|
||||
}
|
||||
total += l
|
||||
count++
|
||||
}
|
||||
avg := total / time.Duration(len(hist.Latencies))
|
||||
if count > 0 {
|
||||
avg := total / time.Duration(count)
|
||||
fmt.Fprintf(&b, "\n %s %dms %s %dms %s %dms",
|
||||
subtleStyle.Render("Min"), minL.Milliseconds(),
|
||||
subtleStyle.Render("Avg"), avg.Milliseconds(),
|
||||
|
||||
+24
-6
@@ -4,6 +4,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -122,6 +123,10 @@ type Model struct {
|
||||
|
||||
filterMode bool
|
||||
filterText string
|
||||
|
||||
// demoMode renders a stable status dot instead of the animated pulse so
|
||||
// screenshots/recordings don't capture the spinner mid-frame. Set via UPTOP_DEMO=1.
|
||||
demoMode bool
|
||||
}
|
||||
|
||||
func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
|
||||
@@ -155,6 +160,7 @@ func InitialModel(isAdmin bool, s store.Store, eng *monitor.Engine) Model {
|
||||
collapsed: collapsed,
|
||||
theme: theme,
|
||||
themeIndex: themeIdx,
|
||||
demoMode: os.Getenv("UPTOP_DEMO") == "1",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -754,11 +760,6 @@ func (m *Model) submitForm() {
|
||||
}
|
||||
|
||||
func (m Model) pulseIndicator() string {
|
||||
frame := m.tickCount % len(pulseFrames)
|
||||
brightness := int(m.pulsePos*155) + 100
|
||||
if brightness > 255 {
|
||||
brightness = 255
|
||||
}
|
||||
hasDown := false
|
||||
for _, s := range m.sites {
|
||||
if !s.Paused && !m.isMonitorInMaintenance(s.ID) && (s.Status == "DOWN" || s.Status == "SSL EXP") {
|
||||
@@ -766,6 +767,19 @@ func (m Model) pulseIndicator() string {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Stills can't show animation: render a stable status dot in demo mode.
|
||||
if m.demoMode {
|
||||
c := m.theme.Success
|
||||
if hasDown {
|
||||
c = m.theme.Danger
|
||||
}
|
||||
return lipgloss.NewStyle().Foreground(c).Render("●")
|
||||
}
|
||||
frame := m.tickCount % len(pulseFrames)
|
||||
brightness := int(m.pulsePos*155) + 100
|
||||
if brightness > 255 {
|
||||
brightness = 255
|
||||
}
|
||||
var color string
|
||||
if hasDown {
|
||||
color = fmt.Sprintf("#%02x%02x%02x", brightness, brightness/4, brightness/4)
|
||||
@@ -953,7 +967,11 @@ func (m Model) viewDashboard() string {
|
||||
online++
|
||||
}
|
||||
}
|
||||
statusParts = append(statusParts, fmt.Sprintf("%d probes", online))
|
||||
probeLabel := "probes"
|
||||
if online == 1 {
|
||||
probeLabel = "probe"
|
||||
}
|
||||
statusParts = append(statusParts, fmt.Sprintf("%d %s", online, probeLabel))
|
||||
}
|
||||
statusLine := strings.Join(statusParts, subtleStyle.Render(" · "))
|
||||
|
||||
|
||||
@@ -0,0 +1,368 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math/rand/v2"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
fmt.Fprintln(os.Stderr, "usage: backfill <db-path>")
|
||||
os.Exit(1)
|
||||
}
|
||||
db, err := sql.Open("sqlite3", os.Args[1])
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "open: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
ids, err := loadSiteIDs(db)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "load site IDs: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
rng := rand.New(rand.NewPCG(42, 0)) //nolint:gosec // deterministic seed for reproducible demo data
|
||||
now := time.Now().UTC()
|
||||
|
||||
if err := backfillHistory(db, rng, now, ids); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "history: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillStateChanges(db, now, ids); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "state changes: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillLogs(db, now); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "logs: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillNodes(db, now); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "nodes: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillMaintenance(db, now, ids); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "maintenance: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
alertIDs, err := loadAlertIDs(db)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "load alert IDs: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := backfillAlertHealth(db, now, alertIDs); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "alert health: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
var count int
|
||||
_ = db.QueryRow("SELECT COUNT(*) FROM check_history").Scan(&count)
|
||||
fmt.Printf("Backfill complete: %d check records\n", count)
|
||||
|
||||
var token string
|
||||
if err := db.QueryRow("SELECT token FROM sites WHERE name='Nightly Backup'").Scan(&token); err == nil {
|
||||
fmt.Printf("PUSH_TOKEN=%s\n", token)
|
||||
}
|
||||
}
|
||||
|
||||
func loadSiteIDs(db *sql.DB) (map[string]int, error) {
|
||||
rows, err := db.Query("SELECT id, name FROM sites")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return scanNameIDs(rows)
|
||||
}
|
||||
|
||||
func loadAlertIDs(db *sql.DB) (map[string]int, error) {
|
||||
rows, err := db.Query("SELECT id, name FROM alerts")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return scanNameIDs(rows)
|
||||
}
|
||||
|
||||
func scanNameIDs(rows *sql.Rows) (map[string]int, error) {
|
||||
defer rows.Close()
|
||||
ids := make(map[string]int)
|
||||
for rows.Next() {
|
||||
var id int
|
||||
var name string
|
||||
if err := rows.Scan(&id, &name); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ids[name] = id
|
||||
}
|
||||
return ids, rows.Err()
|
||||
}
|
||||
|
||||
// backfillAlertHealth seeds realistic send health so the Alerts tab shows recent,
|
||||
// healthy "last sent" times and green health dots instead of "never" across the board.
|
||||
func backfillAlertHealth(db *sql.DB, now time.Time, alertIDs map[string]int) error {
|
||||
type health struct {
|
||||
name string
|
||||
sentAgo time.Duration
|
||||
ok bool
|
||||
sends int
|
||||
fails int
|
||||
}
|
||||
rows := []health{
|
||||
{"Discord Homelab", 4 * time.Minute, true, 37, 0},
|
||||
{"Slack Ops", 9 * time.Minute, true, 21, 1},
|
||||
{"Ntfy Alerts", 1 * time.Hour, true, 12, 0},
|
||||
{"Email Oncall", 3 * time.Hour, true, 5, 0},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT OR REPLACE INTO alert_health (alert_id, last_send_at, last_send_ok, last_error, send_count, fail_count) VALUES (?, ?, ?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, r := range rows {
|
||||
id, ok := alertIDs[r.name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
sentAt := now.Add(-r.sentAgo).Format("2006-01-02 15:04:05")
|
||||
if _, err := stmt.Exec(id, sentAt, r.ok, "", r.sends, r.fails); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
type monitorProfile struct {
|
||||
name string
|
||||
minMs int
|
||||
maxMs int
|
||||
downFrom int // first DOWN check index (-1 = always up)
|
||||
downTo int // exclusive end of the DOWN window; use 60 (total) for a still-down monitor
|
||||
}
|
||||
|
||||
func backfillHistory(db *sql.DB, rng *rand.Rand, now time.Time, ids map[string]int) error {
|
||||
// Latency ranges reflect monitoring public services over the internet, so the
|
||||
// detail histogram brackets the live latency the engine measures at capture time.
|
||||
// 60 checks * 24m spacing = a 24h window; dip indices place outages within it.
|
||||
profiles := []monitorProfile{
|
||||
{"Nextcloud", 200, 600, 47, 48}, // brief blip ~5h ago, recovered
|
||||
{"Jellyfin", 40, 180, 15, 16}, // brief blip ~18h ago, recovered
|
||||
{"Home Assistant", 30, 120, -1, 0}, //
|
||||
{"Gitea", 50, 200, -1, 0}, //
|
||||
{"Traefik Dashboard", 60, 200, -1, 0}, //
|
||||
{"Vaultwarden", 80, 250, -1, 0}, //
|
||||
{"Personal Blog", 40, 160, -1, 0}, //
|
||||
{"Immich", 60, 300, 30, 31}, // brief blip ~12h ago; periodic spikes below
|
||||
{"Auth Portal", 30, 90, 40, 60}, // DOWN ~8h ago, still down
|
||||
{"Edge Router", 5, 20, -1, 0}, // ping
|
||||
{"Postgres", 1, 6, -1, 0}, // port
|
||||
{"DNS Primary", 8, 30, -1, 0}, // dns
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO check_history (site_id, latency_ns, is_up, checked_at) VALUES (?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
const total = 60
|
||||
for _, p := range profiles {
|
||||
siteID, ok := ids[p.name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for i := 0; i < total; i++ {
|
||||
minutesAgo := (total - i) * 24
|
||||
checkedAt := now.Add(-time.Duration(minutesAgo) * time.Minute)
|
||||
|
||||
var latencyNs int64
|
||||
isUp := true
|
||||
|
||||
if p.downFrom >= 0 && i >= p.downFrom && i < p.downTo {
|
||||
latencyNs = 0
|
||||
isUp = false
|
||||
} else {
|
||||
ms := p.minMs + rng.IntN(p.maxMs-p.minMs)
|
||||
if p.name == "Immich" && i%17 == 0 {
|
||||
ms = 250 + rng.IntN(100)
|
||||
}
|
||||
latencyNs = int64(ms) * 1_000_000
|
||||
}
|
||||
|
||||
if _, err := stmt.Exec(siteID, latencyNs, isUp, checkedAt.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func backfillStateChanges(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
type sc struct {
|
||||
name string
|
||||
from string
|
||||
to string
|
||||
reason string
|
||||
at time.Time
|
||||
}
|
||||
// Timed to line up with the history dips (Nextcloud ~5h, Immich ~12h, Jellyfin ~18h)
|
||||
// and the still-down Auth Portal (~8h), so detail panels read coherently.
|
||||
changes := []sc{
|
||||
{"Nextcloud", "UP", "DOWN", "read timeout", now.Add(-5 * time.Hour).Add(-8 * time.Minute)},
|
||||
{"Nextcloud", "DOWN", "UP", "", now.Add(-5 * time.Hour)},
|
||||
{"Auth Portal", "UP", "DOWN", "no such host", now.Add(-8 * time.Hour)},
|
||||
{"Immich", "UP", "DOWN", "502 Bad Gateway", now.Add(-12 * time.Hour).Add(-8 * time.Minute)},
|
||||
{"Immich", "DOWN", "UP", "", now.Add(-12 * time.Hour)},
|
||||
{"Jellyfin", "UP", "DOWN", "connection reset", now.Add(-18 * time.Hour).Add(-5 * time.Minute)},
|
||||
{"Jellyfin", "DOWN", "UP", "", now.Add(-18 * time.Hour)},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO state_changes (site_id, from_status, to_status, error_reason, changed_at) VALUES (?, ?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, c := range changes {
|
||||
siteID, ok := ids[c.name]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if _, err := stmt.Exec(siteID, c.from, c.to, c.reason, c.at.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func backfillLogs(db *sql.DB, now time.Time) error {
|
||||
type logEntry struct {
|
||||
text string
|
||||
at time.Time
|
||||
}
|
||||
ago := func(h, m, s int) time.Time {
|
||||
return now.Add(-(time.Duration(h)*time.Hour + time.Duration(m)*time.Minute + time.Duration(s)*time.Second))
|
||||
}
|
||||
// Ordered newest-first. The bracket time is derived from `at` (not hardcoded), so the
|
||||
// Logs view — which renders the leading [HH:MM] — reads chronologically. Outage times
|
||||
// line up with the state changes and history dips above.
|
||||
logs := []logEntry{
|
||||
{"Monitor 'Nextcloud' recovered (was down 8m)", ago(5, 0, 0)},
|
||||
{"Monitor 'Nextcloud' confirmed DOWN: read timeout", ago(5, 8, 0)},
|
||||
{"Monitor 'Nextcloud' failed check 2/2", ago(5, 8, 30)},
|
||||
{"Monitor 'Nextcloud' failed check 1/2", ago(5, 9, 0)},
|
||||
{"Monitor 'Auth Portal' confirmed DOWN: no such host", ago(8, 0, 0)},
|
||||
{"Monitor 'Auth Portal' failed check 2/2", ago(8, 0, 30)},
|
||||
{"Monitor 'Auth Portal' failed check 1/2", ago(8, 1, 0)},
|
||||
{"Monitor 'Immich' recovered (was down 8m)", ago(12, 0, 0)},
|
||||
{"Monitor 'Immich' confirmed DOWN: 502 Bad Gateway", ago(12, 8, 0)},
|
||||
{"Monitor 'Immich' failed check 3/3", ago(12, 8, 30)},
|
||||
{"Monitor 'Immich' failed check 2/3", ago(12, 9, 0)},
|
||||
{"Monitor 'Immich' failed check 1/3", ago(12, 9, 30)},
|
||||
{"Monitor 'Jellyfin' recovered (was down 5m)", ago(18, 0, 0)},
|
||||
{"Monitor 'Jellyfin' confirmed DOWN: connection reset", ago(18, 5, 0)},
|
||||
{"Monitor 'Jellyfin' failed check 2/2", ago(18, 5, 30)},
|
||||
{"Monitor 'Jellyfin' failed check 1/2", ago(18, 6, 0)},
|
||||
{"SSL warning: certificate for 'Personal Blog' expires in 9 days", ago(20, 0, 0)},
|
||||
{"Engine RESUMED (Active)", ago(22, 0, 0)},
|
||||
{"Loaded check history from database", ago(22, 0, 5)},
|
||||
}
|
||||
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO logs (message, created_at) VALUES (?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
for _, l := range logs {
|
||||
// Bracket in local time to match the engine's live AddLog timestamps;
|
||||
// created_at stays UTC to match the store's CURRENT_TIMESTAMP ordering.
|
||||
msg := "[" + l.at.Local().Format("15:04") + "] " + l.text
|
||||
if _, err := stmt.Exec(msg, l.at.Format("2006-01-02 15:04:05")); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
func backfillNodes(db *sql.DB, now time.Time) error {
|
||||
// Multiple regions to show distributed probes. All seen "now" so they read ONLINE
|
||||
// for the whole capture window (kept under the 60s freshness threshold by the tape).
|
||||
nodes := []struct{ id, name, region string }{
|
||||
{"node-use1", "leader", "us-east"},
|
||||
{"node-euw1", "probe-eu", "eu-west"},
|
||||
{"node-apse1", "probe-ap", "ap-southeast"},
|
||||
}
|
||||
ts := now.Format("2006-01-02 15:04:05")
|
||||
for _, n := range nodes {
|
||||
if _, err := db.Exec(
|
||||
"INSERT OR REPLACE INTO nodes (id, name, region, last_seen, version) VALUES (?, ?, ?, ?, ?)",
|
||||
n.id, n.name, n.region, ts, "2026.05.1",
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func backfillMaintenance(db *sql.DB, now time.Time, ids map[string]int) error {
|
||||
tx, err := db.Begin()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { _ = tx.Rollback() }()
|
||||
|
||||
stmt, err := tx.Prepare("INSERT INTO maintenance_windows (monitor_id, title, description, type, start_time, end_time, created_by) VALUES (?, ?, ?, ?, ?, ?, ?)")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stmt.Close()
|
||||
|
||||
jellyfinID := ids["Jellyfin"]
|
||||
past := now.Add(-3 * 24 * time.Hour)
|
||||
if _, err := stmt.Exec(jellyfinID, "Jellyfin upgrade", "Upgrade to v10.10 + plugin updates", "maintenance",
|
||||
past.Format("2006-01-02 15:04:05"),
|
||||
past.Add(2*time.Hour).Format("2006-01-02 15:04:05"),
|
||||
"admin"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
future := now.Add(2 * 24 * time.Hour)
|
||||
if _, err := stmt.Exec(0, "Network switch replacement", "Replacing core switch in rack 2", "maintenance",
|
||||
future.Format("2006-01-02 15:04:05"),
|
||||
future.Add(4*time.Hour).Format("2006-01-02 15:04:05"),
|
||||
"admin"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return tx.Commit()
|
||||
}
|
||||
@@ -0,0 +1,123 @@
|
||||
// Command crop trims the uniform background border around each VHS screenshot so the
|
||||
// content fills the frame instead of floating in a large empty terminal. Sparse views
|
||||
// (alerts, detail, nodes) would otherwise sit in a sea of dead space.
|
||||
//
|
||||
// Usage: crop [dir] (dir defaults to vhs/screenshots)
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"image/png"
|
||||
"os"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// pad is the margin (px) left around the detected content. tol is the per-channel
|
||||
// colour distance (summed) above which a pixel counts as content rather than background.
|
||||
const (
|
||||
pad = 24
|
||||
tol = 28
|
||||
)
|
||||
|
||||
func main() {
|
||||
dir := "vhs/screenshots"
|
||||
if len(os.Args) > 1 {
|
||||
dir = os.Args[1]
|
||||
}
|
||||
paths, err := filepath.Glob(filepath.Join(dir, "*.png"))
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "glob: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if len(paths) == 0 {
|
||||
fmt.Fprintf(os.Stderr, "no PNGs in %s\n", dir)
|
||||
os.Exit(1)
|
||||
}
|
||||
for _, p := range paths {
|
||||
w, h, err := cropFile(p)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "crop %s: %v\n", p, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
fmt.Printf("cropped %s -> %dx%d\n", filepath.Base(p), w, h)
|
||||
}
|
||||
}
|
||||
|
||||
func cropFile(path string) (int, int, error) {
|
||||
f, err := os.Open(path) //nolint:gosec // dev tool: paths come from a trusted local glob
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
src, err := png.Decode(f)
|
||||
_ = f.Close()
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
|
||||
b := src.Bounds()
|
||||
// Background colour sampled from a corner — always inside VHS's blank padding.
|
||||
bgR, bgG, bgB := rgb(src.At(b.Min.X+2, b.Min.Y+2))
|
||||
|
||||
minX, minY := b.Max.X, b.Max.Y
|
||||
maxX, maxY := b.Min.X, b.Min.Y
|
||||
found := false
|
||||
for y := b.Min.Y; y < b.Max.Y; y++ {
|
||||
for x := b.Min.X; x < b.Max.X; x++ {
|
||||
r, g, bl := rgb(src.At(x, y))
|
||||
if abs(r-bgR)+abs(g-bgG)+abs(bl-bgB) > tol {
|
||||
found = true
|
||||
minX, minY = min(minX, x), min(minY, y)
|
||||
maxX, maxY = max(maxX, x), max(maxY, y)
|
||||
}
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return b.Dx(), b.Dy(), nil // blank frame — leave untouched
|
||||
}
|
||||
|
||||
minX = clamp(minX-pad, b.Min.X, b.Max.X)
|
||||
minY = clamp(minY-pad, b.Min.Y, b.Max.Y)
|
||||
maxX = clamp(maxX+pad+1, b.Min.X, b.Max.X)
|
||||
maxY = clamp(maxY+pad+1, b.Min.Y, b.Max.Y)
|
||||
|
||||
dst := image.NewRGBA(image.Rect(0, 0, maxX-minX, maxY-minY))
|
||||
for y := minY; y < maxY; y++ {
|
||||
for x := minX; x < maxX; x++ {
|
||||
dst.Set(x-minX, y-minY, src.At(x, y))
|
||||
}
|
||||
}
|
||||
|
||||
out, err := os.Create(path) //nolint:gosec // dev tool: paths come from a trusted local glob
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer out.Close() //nolint:errcheck // best-effort close on write path
|
||||
if err := png.Encode(out, dst); err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return dst.Bounds().Dx(), dst.Bounds().Dy(), nil
|
||||
}
|
||||
|
||||
func rgb(c color.Color) (int, int, int) {
|
||||
r, g, b, _ := c.RGBA()
|
||||
return int(r >> 8), int(g >> 8), int(b >> 8)
|
||||
}
|
||||
|
||||
func abs(x int) int {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
|
||||
func clamp(v, lo, hi int) int {
|
||||
if v < lo {
|
||||
return lo
|
||||
}
|
||||
if v > hi {
|
||||
return hi
|
||||
}
|
||||
return v
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
Set Shell "bash"
|
||||
Set Width 1400
|
||||
Set Height 800
|
||||
Set FontSize 14
|
||||
Set Padding 20
|
||||
Set Framerate 15
|
||||
Set TypingSpeed 50ms
|
||||
|
||||
# Seed demo data + start uptop (UPTOP_DEMO=1 → stable pulse dot for stills).
|
||||
Hide
|
||||
Type "bash vhs/setup.sh /tmp/uptop-vhs.db"
|
||||
Enter
|
||||
# Warm-up: push heartbeat lands (~10s) and initial checks settle. Kept short so every
|
||||
# capture stays inside the 60s node-freshness window (consistent "3 probes" footer).
|
||||
Sleep 18s
|
||||
Show
|
||||
Sleep 2s
|
||||
|
||||
# 1. Sites — hero shot: mixed states, history sparklines, SSL, retries.
|
||||
Screenshot vhs/screenshots/monitors.png
|
||||
Sleep 1s
|
||||
|
||||
# 2. Detail — drill into Nextcloud (6th row from the top).
|
||||
Down
|
||||
Sleep 150ms
|
||||
Down
|
||||
Sleep 150ms
|
||||
Down
|
||||
Sleep 150ms
|
||||
Down
|
||||
Sleep 150ms
|
||||
Down
|
||||
Sleep 300ms
|
||||
Type "i"
|
||||
Sleep 2s
|
||||
Screenshot vhs/screenshots/detail.png
|
||||
Sleep 500ms
|
||||
Escape
|
||||
Sleep 1s
|
||||
|
||||
# 3. Alerts — channels with health dots + recent "last sent".
|
||||
Tab
|
||||
Sleep 1500ms
|
||||
Screenshot vhs/screenshots/alerts.png
|
||||
Sleep 500ms
|
||||
|
||||
# 4. Logs — chronological, severity-coloured event stream.
|
||||
Tab
|
||||
Sleep 1500ms
|
||||
Screenshot vhs/screenshots/logs.png
|
||||
Sleep 500ms
|
||||
|
||||
# 5. Nodes — distributed probes across regions.
|
||||
Tab
|
||||
Sleep 1500ms
|
||||
Screenshot vhs/screenshots/nodes.png
|
||||
Sleep 500ms
|
||||
|
||||
# 6. Theme — cycle to the next theme, return to Sites for an alternate-palette hero.
|
||||
Type "T"
|
||||
Sleep 500ms
|
||||
Tab
|
||||
Sleep 200ms
|
||||
Tab
|
||||
Sleep 200ms
|
||||
Tab
|
||||
Sleep 1s
|
||||
Screenshot vhs/screenshots/theme.png
|
||||
Sleep 500ms
|
||||
|
||||
Type "q"
|
||||
Sleep 1s
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 84 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 78 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 206 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 232 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 57 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 253 KiB |
+141
@@ -0,0 +1,141 @@
|
||||
alerts:
|
||||
- name: Discord Homelab
|
||||
type: discord
|
||||
settings:
|
||||
url: https://discord.com/api/webhooks/1234567890/demo-token
|
||||
|
||||
- name: Ntfy Alerts
|
||||
type: webhook
|
||||
settings:
|
||||
url: https://ntfy.example.com/homelab-alerts
|
||||
|
||||
- name: Email Oncall
|
||||
type: email
|
||||
settings:
|
||||
host: smtp.example.com
|
||||
port: "587"
|
||||
user: alerts@example.com
|
||||
pass: "••••••••"
|
||||
from: alerts@example.com
|
||||
to: oncall@example.com
|
||||
|
||||
- name: Slack Ops
|
||||
type: slack
|
||||
settings:
|
||||
url: https://hooks.slack.com/services/T00000/B00000/demo-token
|
||||
|
||||
monitors:
|
||||
# HTTP — homelab services
|
||||
- name: Nextcloud
|
||||
type: http
|
||||
url: https://nextcloud.com
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 2
|
||||
|
||||
- name: Jellyfin
|
||||
type: http
|
||||
url: https://jellyfin.org
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
max_retries: 2
|
||||
|
||||
- name: Home Assistant
|
||||
type: http
|
||||
url: https://www.home-assistant.io
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
max_retries: 3
|
||||
|
||||
- name: Gitea
|
||||
type: http
|
||||
url: https://about.gitea.com
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 2
|
||||
|
||||
- name: Traefik Dashboard
|
||||
type: http
|
||||
url: https://traefik.io
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
max_retries: 1
|
||||
|
||||
- name: Vaultwarden
|
||||
type: http
|
||||
url: https://bitwarden.com
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 3
|
||||
|
||||
- name: Personal Blog
|
||||
type: http
|
||||
url: https://jvns.ca
|
||||
interval: 120
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 14
|
||||
max_retries: 2
|
||||
|
||||
- name: Immich
|
||||
type: http
|
||||
url: https://immich.app
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
check_ssl: true
|
||||
expiry_threshold: 7
|
||||
max_retries: 3
|
||||
|
||||
# HTTP — deliberate failure (non-resolving homelab host → stays DOWN)
|
||||
- name: Auth Portal
|
||||
type: http
|
||||
url: https://auth.home.arpa
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
max_retries: 2
|
||||
|
||||
# Push — cron jobs
|
||||
- name: Nightly Backup
|
||||
type: push
|
||||
interval: 300
|
||||
alert: Discord Homelab
|
||||
|
||||
- name: Cert Renewal
|
||||
type: push
|
||||
interval: 300
|
||||
alert: Discord Homelab
|
||||
|
||||
# Infrastructure group
|
||||
- name: Infrastructure
|
||||
type: group
|
||||
alert: Discord Homelab
|
||||
monitors:
|
||||
- name: Edge Router
|
||||
type: ping
|
||||
hostname: 8.8.8.8
|
||||
interval: 30
|
||||
alert: Discord Homelab
|
||||
timeout: 5
|
||||
|
||||
- name: Postgres
|
||||
type: port
|
||||
hostname: localhost
|
||||
port: 18099
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
timeout: 5
|
||||
|
||||
- name: DNS Primary
|
||||
type: dns
|
||||
hostname: google.com
|
||||
dns_server: 8.8.8.8
|
||||
dns_resolve_type: A
|
||||
interval: 60
|
||||
alert: Discord Homelab
|
||||
timeout: 5
|
||||
Executable
+31
@@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
# VHS screenshot setup: seed monitors, backfill history, start server.
|
||||
set -e
|
||||
DB="${1:?usage: setup.sh <db-path>}"
|
||||
|
||||
rm -f "$DB" "$DB-shm" "$DB-wal"
|
||||
|
||||
echo "==> Seeding monitors and alerts..."
|
||||
UPTOP_DB_DSN="$DB" ./uptop apply -f vhs/seed.yaml 2>&1
|
||||
|
||||
echo "==> Backfilling check history..."
|
||||
# Build first so the backfill's `now` (node last_seen, heartbeat timing) isn't racing
|
||||
# a cold compile — keeps the capture window deterministic.
|
||||
go build -o /tmp/uptop-backfill ./vhs/backfill/
|
||||
BACKFILL_OUT=$(/tmp/uptop-backfill "$DB")
|
||||
echo "$BACKFILL_OUT"
|
||||
|
||||
PUSH_TOKEN=$(echo "$BACKFILL_OUT" | grep '^PUSH_TOKEN=' | cut -d= -f2)
|
||||
if [ -n "$PUSH_TOKEN" ]; then
|
||||
echo "==> Sending push heartbeat in 10s (background)..."
|
||||
(sleep 10 && curl -s "http://localhost:18099/api/push" -H "Authorization: Bearer $PUSH_TOKEN" > /dev/null 2>&1) &
|
||||
fi
|
||||
|
||||
echo "==> Starting uptop server..."
|
||||
exec env \
|
||||
UPTOP_DB_DSN="$DB" \
|
||||
UPTOP_PORT=23299 \
|
||||
UPTOP_HTTP_PORT=18099 \
|
||||
UPTOP_ALLOW_PRIVATE_TARGETS=true \
|
||||
UPTOP_DEMO=1 \
|
||||
./uptop serve 2>/dev/null
|
||||
Reference in New Issue
Block a user