diff --git a/cmd/uptop/keycache_test.go b/cmd/uptop/keycache_test.go new file mode 100644 index 0000000..23df5ac --- /dev/null +++ b/cmd/uptop/keycache_test.go @@ -0,0 +1,115 @@ +package main + +import ( + "crypto/ed25519" + "crypto/rand" + "errors" + "testing" + "time" + + "gitea.lerkolabs.com/lerkolabs/uptop/internal/models" + "gitea.lerkolabs.com/lerkolabs/uptop/internal/store" + + "github.com/charmbracelet/ssh" + gossh "golang.org/x/crypto/ssh" +) + +// kcMockStore implements only what keyCache and userInvalidatingStore touch; +// any other Store method panics via the embedded nil interface. +type kcMockStore struct { + store.Store + users []models.User + err error +} + +func (m *kcMockStore) GetAllUsers() ([]models.User, error) { return m.users, m.err } +func (m *kcMockStore) DeleteUser(int) error { return nil } + +func testKey(t *testing.T) (string, ssh.PublicKey) { + t.Helper() + pub, _, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatal(err) + } + sk, err := gossh.NewPublicKey(pub) + if err != nil { + t.Fatal(err) + } + return string(gossh.MarshalAuthorizedKey(sk)), sk +} + +func TestKeyCache_AllowsKnownDeniesUnknown(t *testing.T) { + authorized, known := testKey(t) + _, unknown := testKey(t) + kc := newKeyCache(&kcMockStore{users: []models.User{{PublicKey: authorized}}}) + + if !kc.IsAllowed(known) { + t.Error("known key denied") + } + if kc.IsAllowed(unknown) { + t.Error("unknown key allowed") + } +} + +func TestKeyCache_RetainsKeysOnRefreshError(t *testing.T) { + authorized, known := testKey(t) + ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}} + kc := newKeyCache(ms) + + if !kc.IsAllowed(known) { + t.Fatal("known key denied on first refresh") + } + + // DB goes down and the cache goes stale: a transient error must not lock + // every admin out — the previous key set stays in effect. + ms.err = errors.New("db down") + kc.mu.Lock() + kc.updated = time.Now().Add(-time.Hour) + kc.mu.Unlock() + + if !kc.IsAllowed(known) { + t.Error("transient refresh error locked out a previously valid key") + } +} + +func TestKeyCache_FailsClosedAfterInvalidate(t *testing.T) { + authorized, known := testKey(t) + ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}} + kc := newKeyCache(ms) + + if !kc.IsAllowed(known) { + t.Fatal("known key denied on first refresh") + } + + // Revocation happened (Invalidate) and the DB is unreachable for the + // re-read: the revoked key must NOT keep working off the stale cache. + ms.err = errors.New("db down") + kc.Invalidate() + + if kc.IsAllowed(known) { + t.Error("revoked key still allowed while DB is down — fails open") + } +} + +func TestUserInvalidatingStore_DeleteDropsKeyCache(t *testing.T) { + authorized, known := testKey(t) + ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}} + kc := newKeyCache(ms) + s := &userInvalidatingStore{Store: ms, kc: kc} + + if !kc.IsAllowed(known) { + t.Fatal("known key denied on first refresh") + } + + // Revoke the user; DB unreachable immediately after. The cached key must + // be gone the moment the delete returns. + if err := s.DeleteUser(1); err != nil { + t.Fatal(err) + } + ms.users = nil + ms.err = errors.New("db down") + + if kc.IsAllowed(known) { + t.Error("deleted user's key still allowed from stale cache") + } +} diff --git a/cmd/uptop/main.go b/cmd/uptop/main.go index 0a00dfe..1d5410b 100644 --- a/cmd/uptop/main.go +++ b/cmd/uptop/main.go @@ -376,7 +376,8 @@ func runServe(args []string) { fmt.Println("WARNING: No UPTOP_ENCRYPTION_KEY set. Alert credentials stored unencrypted.") } - var s store.Store = ss + kc := newKeyCache(ss) + var s store.Store = &userInvalidatingStore{Store: ss, kc: kc} if err := s.Init(); err != nil { fmt.Fprintf(os.Stderr, "database init error: %v\n", err) os.Exit(1) @@ -430,6 +431,10 @@ func runServe(args []string) { tlsCert := os.Getenv("UPTOP_TLS_CERT") tlsKey := os.Getenv("UPTOP_TLS_KEY") + // When the local TUI owns the terminal, per-request HTTP logs to stderr + // would scribble over the alt screen. + localTUI := isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()) + httpSrv := server.Start(server.ServerConfig{ Port: httpPort, EnableStatus: enableStatus, @@ -441,6 +446,7 @@ func runServe(args []string) { MetricsPublic: os.Getenv("UPTOP_METRICS_PUBLIC") == "true", CORSOrigin: os.Getenv("UPTOP_CORS_ORIGIN"), TrustedProxies: parseTrustedProxies(os.Getenv("UPTOP_TRUSTED_PROXIES")), + QuietHTTPLog: localTUI, }, s, eng) cluster.Start(ctx, cluster.Config{ @@ -449,10 +455,9 @@ func runServe(args []string) { SharedKey: clusterKey, }, eng) - kc := newKeyCache(s) sshSrv := startSSHServer(*port, s, eng, kc) - if isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()) { + if localTUI { p := tea.NewProgram(tui.InitialModel(true, s, eng, version), tea.WithAltScreen(), tea.WithMouseCellMotion()) if _, err := p.Run(); err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) @@ -573,6 +578,10 @@ func newKeyCache(db store.Store) *keyCache { func (c *keyCache) refresh() { users, err := c.db.GetAllUsers() if err != nil { + // Keep the previous key set: a transient DB error must not lock every + // admin out. Revocation still fails closed because Invalidate clears + // the set immediately. + log.Printf("SSH key cache refresh failed: %v", err) return } keys := make([]ssh.PublicKey, 0, len(users)) @@ -589,8 +598,13 @@ func (c *keyCache) refresh() { c.mu.Unlock() } +// Invalidate clears the cached key set, not just the timestamp. If the +// refresh that follows a user revocation fails, auth fails closed (everyone +// re-authenticates after the next successful refresh) instead of the revoked +// key silently continuing to work off the stale cache. func (c *keyCache) Invalidate() { c.mu.Lock() + c.keys = nil c.updated = time.Time{} c.mu.Unlock() } @@ -614,6 +628,39 @@ func (c *keyCache) IsAllowed(incomingKey ssh.PublicKey) bool { return false } +// userInvalidatingStore drops the SSH key cache whenever the user table +// changes, so a revocation takes effect on the next connection attempt +// instead of after the cache TTL — and fails closed if the DB is unreachable +// when that next attempt re-reads the table. +type userInvalidatingStore struct { + store.Store + kc *keyCache +} + +func (s *userInvalidatingStore) AddUser(username, publicKey, role string) error { + err := s.Store.AddUser(username, publicKey, role) + s.kc.Invalidate() + return err +} + +func (s *userInvalidatingStore) UpdateUser(id int, username, publicKey, role string) error { + err := s.Store.UpdateUser(id, username, publicKey, role) + s.kc.Invalidate() + return err +} + +func (s *userInvalidatingStore) DeleteUser(id int) error { + err := s.Store.DeleteUser(id) + s.kc.Invalidate() + return err +} + +func (s *userInvalidatingStore) ImportData(data models.Backup) error { + err := s.Store.ImportData(data) + s.kc.Invalidate() + return err +} + func seedKeysFromEnv(s store.Store) { var keys []string diff --git a/go.mod b/go.mod index 96104a2..a8a895a 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/mattn/go-sqlite3 v1.14.33 github.com/miekg/dns v1.1.72 github.com/prometheus-community/pro-bing v0.8.0 + golang.org/x/crypto v0.52.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -50,7 +51,6 @@ require ( github.com/muesli/termenv v0.16.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect - golang.org/x/crypto v0.52.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/mod v0.35.0 // indirect golang.org/x/net v0.55.0 // indirect diff --git a/internal/models/redact.go b/internal/models/redact.go new file mode 100644 index 0000000..5133647 --- /dev/null +++ b/internal/models/redact.go @@ -0,0 +1,36 @@ +package models + +// safeAlertSettingKeys lists, per provider type, the alert settings that are +// NOT secret and may be shown or exported in the clear. Everything else is +// redacted. Providers absent from this map (discord, slack, webhook, pushover) +// carry their secret in a field a denylist would miss — the webhook URL, the +// pushover token/user — so all of their settings are redacted. +var safeAlertSettingKeys = map[string]map[string]bool{ + "email": {"host": true, "port": true, "to": true, "from": true}, + "ntfy": {"topic": true, "priority": true}, + "telegram": {"chat_id": true}, + "pagerduty": {"severity": true}, + "gotify": {"priority": true}, + "opsgenie": {"priority": true, "eu": true}, +} + +// RedactAlertSettings keeps only the known-safe keys for the alert type and +// redacts everything else. An allowlist fails safe: an unknown or newly added +// setting is redacted by default instead of leaking. Shared by the backup +// export path and the TUI alert detail panel so both render through the same +// policy. +func RedactAlertSettings(alertType string, settings map[string]string) map[string]string { + safe := safeAlertSettingKeys[alertType] + redacted := make(map[string]string, len(settings)) + for k, v := range settings { + switch { + case v == "": + redacted[k] = "" + case safe[k]: + redacted[k] = v + default: + redacted[k] = "***REDACTED***" + } + } + return redacted +} diff --git a/internal/server/server.go b/internal/server/server.go index 8439207..0430a26 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -33,40 +33,8 @@ func extractBearerToken(r *http.Request) string { return "" } -// safeSettingKeys lists, per provider type, the settings that are NOT secret -// and may be exported in the clear. Everything else is redacted. Providers -// absent from this map (discord, slack, webhook, pushover) carry their secret -// in a field a denylist would miss — the webhook URL, the pushover token/user — -// so all of their settings are redacted. -var safeSettingKeys = map[string]map[string]bool{ - "email": {"host": true, "port": true, "to": true, "from": true}, - "ntfy": {"topic": true, "priority": true}, - "telegram": {"chat_id": true}, - "pagerduty": {"severity": true}, - "gotify": {"priority": true}, - "opsgenie": {"priority": true, "eu": true}, -} - -// redactByProvider keeps only the known-safe keys for the alert type and -// redacts everything else. An allowlist fails safe: an unknown or newly added -// setting is redacted by default instead of leaking. This closes the denylist -// gap where url (discord/slack/webhook/ntfy/gotify) and api_key (opsgenie) — -// the actual credentials — were exported in the clear. -func redactByProvider(alertType string, settings map[string]string) map[string]string { - safe := safeSettingKeys[alertType] - redacted := make(map[string]string, len(settings)) - for k, v := range settings { - switch { - case v == "": - redacted[k] = "" - case safe[k]: - redacted[k] = v - default: - redacted[k] = "***REDACTED***" - } - } - return redacted -} +// Alert-settings redaction policy lives in models.RedactAlertSettings so the +// TUI detail panel and this export path share one allowlist. var statusTpl = template.Must(template.New("status").Parse(` @@ -211,6 +179,23 @@ type ServerConfig struct { MetricsPublic bool CORSOrigin string TrustedProxies []*net.IPNet + // QuietHTTPLog disables per-request stderr logging. Set when the local + // TUI owns the terminal — request logs would scribble over the alt screen. + QuietHTTPLog bool +} + +// statusSite is the public DTO for /status/json. models.Site must never be +// serialized raw here: it carries internal fields (LastError, Hostname, Port, +// DNSServer, AlertID, Token, ...) and every field added to it would become +// public by default. Field names match what the status page JS reads. +type statusSite struct { + Name string + Type string + URL string + Status string + Paused bool + LastCheck time.Time + Latency time.Duration } func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server { @@ -278,7 +263,7 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server { } if r.URL.Query().Get("redact_secrets") != "false" { for i := range data.Alerts { - data.Alerts[i].Settings = redactByProvider(data.Alerts[i].Type, data.Alerts[i].Settings) + data.Alerts[i].Settings = models.RedactAlertSettings(data.Alerts[i].Type, data.Alerts[i].Settings) } } _ = json.NewEncoder(w).Encode(data) //nolint:errcheck @@ -483,18 +468,27 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server { maintSet[mw.MonitorID] = true } } + public := make(map[int]statusSite, len(state)) for id, site := range state { - site.Token = "" + status := site.Status if allInMaint || maintSet[site.ID] || (site.ParentID > 0 && maintSet[site.ParentID]) { - site.Status = "MAINT" + status = "MAINT" + } + public[id] = statusSite{ + Name: site.Name, + Type: site.Type, + URL: site.URL, + Status: status, + Paused: site.Paused, + LastCheck: site.LastCheck, + Latency: site.Latency, } - state[id] = site } if cfg.CORSOrigin != "" { w.Header().Set("Access-Control-Allow-Origin", cfg.CORSOrigin) } w.Header().Set("Content-Type", "application/json") - _ = json.NewEncoder(w).Encode(state) //nolint:errcheck + _ = json.NewEncoder(w).Encode(public) //nolint:errcheck })) } @@ -502,7 +496,10 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server { fmt.Println("WARNING: Cluster mode active without TLS. Secrets transmitted in cleartext.") } - handler := loggingMiddleware(cfg.TrustedProxies, securityHeadersMiddleware(mux)) + handler := securityHeadersMiddleware(mux) + if !cfg.QuietHTTPLog { + handler = loggingMiddleware(cfg.TrustedProxies, handler) + } if cfg.TLSCert != "" { handler = hstsMiddleware(handler) } diff --git a/internal/server/server_test.go b/internal/server/server_test.go index 377293e..4609096 100644 --- a/internal/server/server_test.go +++ b/internal/server/server_test.go @@ -2,6 +2,7 @@ package server import ( "bytes" + "context" "encoding/json" "fmt" "net" @@ -480,15 +481,32 @@ func TestStatusPage_Enabled(t *testing.T) { } } -func TestStatusJSON_TokensStripped(t *testing.T) { +func TestStatusJSON_PublicDTOOnly(t *testing.T) { ts := newTestServer(t, "secret", true) - // Inject a site with a token into engine state - ts.engine.UpdateSiteConfig(models.Site{ID: 1, Name: "test", Type: "push", Token: "secret-token", Status: "UP"}) - // Need to inject directly since UpdateSiteConfig only updates existing - func() { - ts.engine.RecordHeartbeat("unused") // just to exercise, won't match - }() + // Seed a push monitor (no network IO) through the store and start the + // engine so its poll loop loads it into live state — the path real sites + // take. The old version of this test injected via UpdateSiteConfig, which + // no-ops for unknown IDs, so it asserted over zero sites and passed + // against a server that leaked tokens. + ts.store.sites = []models.Site{{ + ID: 1, Name: "test", Type: "push", Token: "secret-token", + Hostname: "internal-host", LastError: "internal failure detail", AlertID: 3, + }} + ctx, cancel := context.WithCancel(context.Background()) + ts.engine.Start(ctx) + t.Cleanup(func() { + cancel() + ts.engine.Stop() + }) + + deadline := time.Now().Add(2 * time.Second) + for time.Now().Before(deadline) && len(ts.engine.GetLiveState()) == 0 { + time.Sleep(10 * time.Millisecond) + } + if len(ts.engine.GetLiveState()) == 0 { + t.Fatal("engine never loaded the seeded site") + } resp, err := http.Get(ts.baseURL + "/status/json") if err != nil { @@ -498,11 +516,23 @@ func TestStatusJSON_TokensStripped(t *testing.T) { if resp.StatusCode != 200 { t.Errorf("expected 200, got %d", resp.StatusCode) } - var state map[string]models.Site - json.NewDecoder(resp.Body).Decode(&state) + + // Decode raw so absent struct fields can't mask leaked JSON keys. + var state map[string]map[string]any + if err := json.NewDecoder(resp.Body).Decode(&state); err != nil { + t.Fatal(err) + } + if len(state) != 1 { + t.Fatalf("expected 1 site in status JSON, got %d", len(state)) + } for _, site := range state { - if site.Token != "" { - t.Error("expected token stripped from status JSON response") + if site["Name"] != "test" { + t.Errorf("expected Name to be public, got %v", site["Name"]) + } + for _, leaked := range []string{"Token", "LastError", "Hostname", "Port", "DNSServer", "AlertID", "AcceptedCodes", "Interval"} { + if _, ok := site[leaked]; ok { + t.Errorf("status JSON leaks internal field %q", leaked) + } } } } @@ -656,7 +686,7 @@ func TestRedactByProvider(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - out := redactByProvider(tt.typ, tt.in) + out := models.RedactAlertSettings(tt.typ, tt.in) for _, k := range tt.redacted { if out[k] != "***REDACTED***" { t.Errorf("key %q: expected redacted, got %q", k, out[k]) diff --git a/internal/tui/tab_alerts.go b/internal/tui/tab_alerts.go index 2f050dc..f7c7e1a 100644 --- a/internal/tui/tab_alerts.go +++ b/internal/tui/tab_alerts.go @@ -2,8 +2,11 @@ package tui import ( "fmt" + neturl "net/url" + "sort" "strings" + "gitea.lerkolabs.com/lerkolabs/uptop/internal/models" "gitea.lerkolabs.com/lerkolabs/uptop/internal/monitor" tea "github.com/charmbracelet/bubbletea" "github.com/charmbracelet/huh" @@ -100,15 +103,17 @@ func (m Model) fmtAlertConfig(alert struct { return m.st.subtleStyle.Render("—") case "pagerduty": if key := alert.Settings["routing_key"]; key != "" { - return limitStr(key, 34) + return limitStr(maskSecret(key), 34) } return m.st.subtleStyle.Render("—") case "pushover": if user := alert.Settings["user"]; user != "" { - return limitStr(fmt.Sprintf("user:%s", user), 34) + return limitStr(fmt.Sprintf("user:%s", maskSecret(user)), 34) } return m.st.subtleStyle.Render("—") case "gotify": + // The gotify server URL identifies the target; the token is the + // secret and is never shown here. if url := alert.Settings["url"]; url != "" { return limitStr(url, 34) } @@ -116,10 +121,7 @@ func (m Model) fmtAlertConfig(alert struct { case "opsgenie": key := alert.Settings["api_key"] if key != "" { - masked := key - if len(masked) > 8 { - masked = masked[:4] + "…" + masked[len(masked)-4:] - } + masked := maskSecret(key) if alert.Settings["eu"] == "true" { return limitStr(fmt.Sprintf("EU %s", masked), 34) } @@ -127,13 +129,33 @@ func (m Model) fmtAlertConfig(alert struct { } return m.st.subtleStyle.Render("—") default: - if val, ok := alert.Settings["url"]; ok { - return limitStr(val, 34) + // discord/slack/webhook: the URL path IS the credential — show only + // enough to identify the target. + if val, ok := alert.Settings["url"]; ok && val != "" { + return limitStr(maskWebhookURL(val), 34) } return m.st.subtleStyle.Render("—") } } +// maskSecret keeps just enough of a credential to identify it. +func maskSecret(s string) string { + if len(s) > 8 { + return s[:4] + "…" + s[len(s)-4:] + } + return "●●●●●●●●" +} + +// maskWebhookURL shows scheme and host only. For discord, slack, and generic +// webhooks the URL path carries the token, so the path is never rendered. +func maskWebhookURL(raw string) string { + u, err := neturl.Parse(raw) + if err != nil || u.Host == "" { + return "●●●●●●●●" + } + return u.Scheme + "://" + u.Host + "/…" +} + func (m Model) fmtAlertHealth(h monitor.AlertHealth) string { if h.LastSendAt.IsZero() { return m.st.subtleStyle.Render("●") @@ -229,7 +251,21 @@ func (m Model) viewAlertDetailPanel() string { b.WriteString(m.divider() + "\n") b.WriteString(m.st.subtleStyle.Render(" CONFIGURATION") + "\n") - for k, v := range a.Settings { + // Render through the same allowlist the backup export uses — this panel + // ends up in screen shares and asciinema recordings. Keys are sorted so + // rows don't reshuffle every render. + redacted := models.RedactAlertSettings(a.Type, a.Settings) + keys := make([]string, 0, len(redacted)) + for k := range redacted { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + v := redacted[k] + if v == "***REDACTED***" { + row(k, m.st.subtleStyle.Render("●●●●●●●●")) + continue + } row(k, v) } diff --git a/internal/tui/tab_alerts_test.go b/internal/tui/tab_alerts_test.go new file mode 100644 index 0000000..71e967d --- /dev/null +++ b/internal/tui/tab_alerts_test.go @@ -0,0 +1,68 @@ +package tui + +import ( + "strings" + "testing" + + "gitea.lerkolabs.com/lerkolabs/uptop/internal/models" +) + +func TestAlertDetailPanel_MasksSecretsStableOrder(t *testing.T) { + m := newTestModel(&tuiMockStore{}) + m.termWidth, m.termHeight = 120, 40 + m.alerts = []models.AlertConfig{{ + ID: 1, Name: "ops", Type: "email", + Settings: map[string]string{ + "host": "smtp.example.com", + "port": "587", + "user": "oncall@example.com", + "pass": "hunter2-secret", + "to": "team@example.com", + }, + }} + m.cursor = 0 + + out := m.viewAlertDetailPanel() + if strings.Contains(out, "hunter2-secret") { + t.Error("SMTP password rendered in alert detail panel") + } + if strings.Contains(out, "oncall@example.com") { + t.Error("SMTP user (not on the allowlist) rendered in alert detail panel") + } + if !strings.Contains(out, "smtp.example.com") { + t.Error("allowlisted setting (host) missing from panel") + } + + // Map iteration must not reshuffle rows between renders. + for i := 0; i < 5; i++ { + if m.viewAlertDetailPanel() != out { + t.Fatal("panel output unstable across renders — settings keys not sorted") + } + } +} + +func TestFmtAlertConfig_MasksSecrets(t *testing.T) { + m := newTestModel(&tuiMockStore{}) + + webhook := m.fmtAlertConfig(struct { + Type string + Settings map[string]string + }{"discord", map[string]string{"url": "https://discord.com/api/webhooks/123456/SeCrEtToKeN"}}) + if strings.Contains(webhook, "SeCrEtToKeN") || strings.Contains(webhook, "123456") { + t.Errorf("webhook URL path (the credential) rendered in table: %q", webhook) + } + if !strings.Contains(webhook, "discord.com") { + t.Errorf("webhook host missing from table config: %q", webhook) + } + + pd := m.fmtAlertConfig(struct { + Type string + Settings map[string]string + }{"pagerduty", map[string]string{"routing_key": "R0123456789ABCDEFGHIJ"}}) + if strings.Contains(pd, "R0123456789ABCDEFGHIJ") { + t.Errorf("pagerduty routing key rendered raw in table: %q", pd) + } + if !strings.Contains(pd, "R012") || !strings.Contains(pd, "GHIJ") { + t.Errorf("masked routing key should keep identifying ends: %q", pd) + } +}