Files
uptop/internal/store/store.go
T
lerko 8b39d4c1a1
CI / test (pull_request) Successful in 2m36s
CI / lint (pull_request) Successful in 56s
CI / vulncheck (pull_request) Successful in 51s
fix(monitor): serialize DB writes through a single drained writer
Every check spawned `go e.db.Save*(...)` with the error discarded: a
fire-and-forget goroutine per log line, check, state change, and alert
health update. SaveLog ran a full-table prune DELETE on every insert and
SaveCheck a COUNT + conditional prune on every check, so the hot path
amplified each write into several statements. Nothing tracked these
goroutines, so at shutdown they raced the store's Close() — writes to a
closing DB, silently swallowed.

Introduce a single writer goroutine that drains a buffered channel of
typed dbWrite values (log/check/state-change/alert-health). Writes are
enqueued non-blocking; a saturated queue drops and notes it in the
in-memory log rather than blocking the check loop. Write errors are now
logged instead of discarded. Retention moves off the hot path: SaveLog
and SaveCheck become plain INSERTs, and PruneLogs/PruneCheckHistory/
PruneStateChanges run on a 10-minute timer inside the writer (single
keep-newest-N-per-site pass via a window function). state_changes was
previously never pruned — now bounded.

Add Engine.Stop(): cancels the engine's context, then waits for the
writer to drain every buffered write before returning. main wires it in
before the deferred store Close() so no write races a closed DB.

SQLite gains busy_timeout=5000 and synchronous=NORMAL, applied via the
DSN so every pooled connection inherits them (a post-open PRAGMA only
touches one connection); WAL moves to the DSN too. :memory: test DBs are
left as-is.

Tests: writer drains on Stop, Stop is idempotent, and the prune queries
keep newest-N per site / N logs on real SQLite. Full suite green under
-race.
2026-06-10 18:14:28 -04:00

86 lines
2.6 KiB
Go

package store
import (
"time"
"gitea.lerkolabs.com/lerkolabs/uptop/internal/models"
)
type Store interface {
Init() error
// Sites
GetSites() ([]models.Site, error)
AddSite(site models.Site) error
UpdateSite(site models.Site) error
UpdateSitePaused(id int, paused bool) error
DeleteSite(id int) error
// Alerts
GetAllAlerts() ([]models.AlertConfig, error)
GetAlert(id int) (models.AlertConfig, error)
AddAlert(name, aType string, settings map[string]string) error
UpdateAlert(id int, name, aType string, settings map[string]string) error
DeleteAlert(id int) error
// Declarative config support
GetSiteByName(name string) (models.Site, error)
GetAlertByName(name string) (models.AlertConfig, error)
AddSiteReturningID(site models.Site) (int, error)
AddAlertReturningID(name, aType string, settings map[string]string) (int, error)
// Users
GetAllUsers() ([]models.User, error)
AddUser(username, publicKey, role string) error
UpdateUser(id int, username, publicKey, role string) error
DeleteUser(id int) error
// History
SaveCheck(siteID int, latencyNs int64, isUp bool) error
SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
PruneCheckHistory() error
// State Changes
SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error
GetStateChanges(siteID int, limit int) ([]models.StateChange, error)
GetStateChangesSince(siteID int, since time.Time) ([]models.StateChange, error)
PruneStateChanges() error
// Nodes
RegisterNode(node models.ProbeNode) error
GetNode(id string) (models.ProbeNode, error)
GetAllNodes() ([]models.ProbeNode, error)
UpdateNodeLastSeen(id string) error
DeleteNode(id string) error
// Alert Health
LoadAlertHealth() (map[int]models.AlertHealthRecord, error)
SaveAlertHealth(h models.AlertHealthRecord) error
// Logs
SaveLog(message string) error
LoadLogs(limit int) ([]string, error)
PruneLogs() error
// Maintenance Windows
GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, error)
GetAllMaintenanceWindows(limit int) ([]models.MaintenanceWindow, error)
AddMaintenanceWindow(mw models.MaintenanceWindow) error
EndMaintenanceWindow(id int) error
DeleteMaintenanceWindow(id int) error
PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error)
IsMonitorInMaintenance(monitorID int) (bool, error)
// Preferences
GetPreference(key string) (string, error)
SetPreference(key, value string) error
// Backup & Restore
ExportData() (models.Backup, error)
ImportData(data models.Backup) error
// Lifecycle
Close() error
}