fix(security): make SSH key revocation fail closed
CI / test (pull_request) Successful in 2m37s
CI / lint (pull_request) Successful in 56s
CI / vulncheck (pull_request) Successful in 51s

keyCache.Invalidate existed but had zero callers, and refresh silently
swallowed store errors — a revoked key kept working off the stale
cache for as long as the DB stayed down.

Invalidate now clears the key set (not just the timestamp) and is
wired through userInvalidatingStore, a decorator at the composition
root that drops the cache on AddUser/UpdateUser/DeleteUser/ImportData.
Transient refresh errors still retain the previous key set so a DB
blip can't lock every admin out, but a post-revocation refresh failure
denies. Refresh errors are logged. First tests for the SSH auth gate.

Also suppresses per-request HTTP logging when the local TUI owns the
terminal — request logs scribbled over the alt screen.
This commit was merged in pull request #103.
This commit is contained in:
2026-06-11 12:26:40 -04:00
parent 0b64d13bb6
commit 92efb8e270
3 changed files with 166 additions and 4 deletions
+115
View File
@@ -0,0 +1,115 @@
package main
import (
"crypto/ed25519"
"crypto/rand"
"errors"
"testing"
"time"
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
"gitea.lerkolabs.com/lerkolabs/uptop/internal/store"
"github.com/charmbracelet/ssh"
gossh "golang.org/x/crypto/ssh"
)
// kcMockStore implements only what keyCache and userInvalidatingStore touch;
// any other Store method panics via the embedded nil interface.
type kcMockStore struct {
store.Store
users []models.User
err error
}
func (m *kcMockStore) GetAllUsers() ([]models.User, error) { return m.users, m.err }
func (m *kcMockStore) DeleteUser(int) error { return nil }
func testKey(t *testing.T) (string, ssh.PublicKey) {
t.Helper()
pub, _, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
t.Fatal(err)
}
sk, err := gossh.NewPublicKey(pub)
if err != nil {
t.Fatal(err)
}
return string(gossh.MarshalAuthorizedKey(sk)), sk
}
func TestKeyCache_AllowsKnownDeniesUnknown(t *testing.T) {
authorized, known := testKey(t)
_, unknown := testKey(t)
kc := newKeyCache(&kcMockStore{users: []models.User{{PublicKey: authorized}}})
if !kc.IsAllowed(known) {
t.Error("known key denied")
}
if kc.IsAllowed(unknown) {
t.Error("unknown key allowed")
}
}
func TestKeyCache_RetainsKeysOnRefreshError(t *testing.T) {
authorized, known := testKey(t)
ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}}
kc := newKeyCache(ms)
if !kc.IsAllowed(known) {
t.Fatal("known key denied on first refresh")
}
// DB goes down and the cache goes stale: a transient error must not lock
// every admin out — the previous key set stays in effect.
ms.err = errors.New("db down")
kc.mu.Lock()
kc.updated = time.Now().Add(-time.Hour)
kc.mu.Unlock()
if !kc.IsAllowed(known) {
t.Error("transient refresh error locked out a previously valid key")
}
}
func TestKeyCache_FailsClosedAfterInvalidate(t *testing.T) {
authorized, known := testKey(t)
ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}}
kc := newKeyCache(ms)
if !kc.IsAllowed(known) {
t.Fatal("known key denied on first refresh")
}
// Revocation happened (Invalidate) and the DB is unreachable for the
// re-read: the revoked key must NOT keep working off the stale cache.
ms.err = errors.New("db down")
kc.Invalidate()
if kc.IsAllowed(known) {
t.Error("revoked key still allowed while DB is down — fails open")
}
}
func TestUserInvalidatingStore_DeleteDropsKeyCache(t *testing.T) {
authorized, known := testKey(t)
ms := &kcMockStore{users: []models.User{{PublicKey: authorized}}}
kc := newKeyCache(ms)
s := &userInvalidatingStore{Store: ms, kc: kc}
if !kc.IsAllowed(known) {
t.Fatal("known key denied on first refresh")
}
// Revoke the user; DB unreachable immediately after. The cached key must
// be gone the moment the delete returns.
if err := s.DeleteUser(1); err != nil {
t.Fatal(err)
}
ms.users = nil
ms.err = errors.New("db down")
if kc.IsAllowed(known) {
t.Error("deleted user's key still allowed from stale cache")
}
}
+50 -3
View File
@@ -376,7 +376,8 @@ func runServe(args []string) {
fmt.Println("WARNING: No UPTOP_ENCRYPTION_KEY set. Alert credentials stored unencrypted.")
}
var s store.Store = ss
kc := newKeyCache(ss)
var s store.Store = &userInvalidatingStore{Store: ss, kc: kc}
if err := s.Init(); err != nil {
fmt.Fprintf(os.Stderr, "database init error: %v\n", err)
os.Exit(1)
@@ -430,6 +431,10 @@ func runServe(args []string) {
tlsCert := os.Getenv("UPTOP_TLS_CERT")
tlsKey := os.Getenv("UPTOP_TLS_KEY")
// When the local TUI owns the terminal, per-request HTTP logs to stderr
// would scribble over the alt screen.
localTUI := isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd())
httpSrv := server.Start(server.ServerConfig{
Port: httpPort,
EnableStatus: enableStatus,
@@ -441,6 +446,7 @@ func runServe(args []string) {
MetricsPublic: os.Getenv("UPTOP_METRICS_PUBLIC") == "true",
CORSOrigin: os.Getenv("UPTOP_CORS_ORIGIN"),
TrustedProxies: parseTrustedProxies(os.Getenv("UPTOP_TRUSTED_PROXIES")),
QuietHTTPLog: localTUI,
}, s, eng)
cluster.Start(ctx, cluster.Config{
@@ -449,10 +455,9 @@ func runServe(args []string) {
SharedKey: clusterKey,
}, eng)
kc := newKeyCache(s)
sshSrv := startSSHServer(*port, s, eng, kc)
if isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()) {
if localTUI {
p := tea.NewProgram(tui.InitialModel(true, s, eng, version), tea.WithAltScreen(), tea.WithMouseCellMotion())
if _, err := p.Run(); err != nil {
fmt.Fprintf(os.Stderr, "error: %v\n", err)
@@ -573,6 +578,10 @@ func newKeyCache(db store.Store) *keyCache {
func (c *keyCache) refresh() {
users, err := c.db.GetAllUsers()
if err != nil {
// Keep the previous key set: a transient DB error must not lock every
// admin out. Revocation still fails closed because Invalidate clears
// the set immediately.
log.Printf("SSH key cache refresh failed: %v", err)
return
}
keys := make([]ssh.PublicKey, 0, len(users))
@@ -589,8 +598,13 @@ func (c *keyCache) refresh() {
c.mu.Unlock()
}
// Invalidate clears the cached key set, not just the timestamp. If the
// refresh that follows a user revocation fails, auth fails closed (everyone
// re-authenticates after the next successful refresh) instead of the revoked
// key silently continuing to work off the stale cache.
func (c *keyCache) Invalidate() {
c.mu.Lock()
c.keys = nil
c.updated = time.Time{}
c.mu.Unlock()
}
@@ -614,6 +628,39 @@ func (c *keyCache) IsAllowed(incomingKey ssh.PublicKey) bool {
return false
}
// userInvalidatingStore drops the SSH key cache whenever the user table
// changes, so a revocation takes effect on the next connection attempt
// instead of after the cache TTL — and fails closed if the DB is unreachable
// when that next attempt re-reads the table.
type userInvalidatingStore struct {
store.Store
kc *keyCache
}
func (s *userInvalidatingStore) AddUser(username, publicKey, role string) error {
err := s.Store.AddUser(username, publicKey, role)
s.kc.Invalidate()
return err
}
func (s *userInvalidatingStore) UpdateUser(id int, username, publicKey, role string) error {
err := s.Store.UpdateUser(id, username, publicKey, role)
s.kc.Invalidate()
return err
}
func (s *userInvalidatingStore) DeleteUser(id int) error {
err := s.Store.DeleteUser(id)
s.kc.Invalidate()
return err
}
func (s *userInvalidatingStore) ImportData(data models.Backup) error {
err := s.Store.ImportData(data)
s.kc.Invalidate()
return err
}
func seedKeysFromEnv(s store.Store) {
var keys []string
+1 -1
View File
@@ -16,6 +16,7 @@ require (
github.com/mattn/go-sqlite3 v1.14.33
github.com/miekg/dns v1.1.72
github.com/prometheus-community/pro-bing v0.8.0
golang.org/x/crypto v0.52.0
gopkg.in/yaml.v3 v3.0.1
)
@@ -50,7 +51,6 @@ require (
github.com/muesli/termenv v0.16.0 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
golang.org/x/crypto v0.52.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/mod v0.35.0 // indirect
golang.org/x/net v0.55.0 // indirect