feat(monitor): auto-prune expired maintenance windows
Background goroutine runs every 15 minutes, deletes maintenance windows that expired beyond the retention period (default 7 days). Configurable via UPTOP_MAINT_RETENTION env var (Go duration format). Closes #72
This commit was merged in pull request #96.
This commit is contained in:
@@ -379,6 +379,11 @@ func runServe(args []string) {
|
|||||||
if aggStrategy != "" {
|
if aggStrategy != "" {
|
||||||
eng.SetAggStrategy(monitor.AggregationStrategy(aggStrategy))
|
eng.SetAggStrategy(monitor.AggregationStrategy(aggStrategy))
|
||||||
}
|
}
|
||||||
|
if v := os.Getenv("UPTOP_MAINT_RETENTION"); v != "" {
|
||||||
|
if d, err := time.ParseDuration(v); err == nil && d > 0 {
|
||||||
|
eng.SetMaintRetention(d)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
|||||||
@@ -65,14 +65,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -63,14 +63,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,10 +17,12 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
maxLogEntries = 100
|
maxLogEntries = 100
|
||||||
pollInterval = 5 * time.Second
|
pollInterval = 5 * time.Second
|
||||||
minCheckInterval = 5
|
minCheckInterval = 5
|
||||||
minPushGrace = 60 * time.Second
|
minPushGrace = 60 * time.Second
|
||||||
|
maintPruneInterval = 15 * time.Minute
|
||||||
|
defaultMaintRetention = 7 * 24 * time.Hour
|
||||||
)
|
)
|
||||||
|
|
||||||
type AlertHealth struct {
|
type AlertHealth struct {
|
||||||
@@ -59,6 +61,7 @@ type Engine struct {
|
|||||||
db store.Store
|
db store.Store
|
||||||
insecureSkipVerify bool
|
insecureSkipVerify bool
|
||||||
allowPrivateTargets bool
|
allowPrivateTargets bool
|
||||||
|
maintRetention time.Duration
|
||||||
strictClient *http.Client
|
strictClient *http.Client
|
||||||
insecureClient *http.Client
|
insecureClient *http.Client
|
||||||
}
|
}
|
||||||
@@ -83,6 +86,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine {
|
|||||||
aggStrategy: AggAnyDown,
|
aggStrategy: AggAnyDown,
|
||||||
isActive: true,
|
isActive: true,
|
||||||
allowPrivateTargets: allowPrivateTargets,
|
allowPrivateTargets: allowPrivateTargets,
|
||||||
|
maintRetention: defaultMaintRetention,
|
||||||
db: s,
|
db: s,
|
||||||
strictClient: &http.Client{
|
strictClient: &http.Client{
|
||||||
Transport: &http.Transport{
|
Transport: &http.Transport{
|
||||||
@@ -103,6 +107,10 @@ func (e *Engine) SetInsecureSkipVerify(skip bool) {
|
|||||||
e.insecureSkipVerify = skip
|
e.insecureSkipVerify = skip
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *Engine) SetMaintRetention(d time.Duration) {
|
||||||
|
e.maintRetention = d
|
||||||
|
}
|
||||||
|
|
||||||
var ansiRe = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`)
|
var ansiRe = regexp.MustCompile(`\x1b\[[0-9;]*[a-zA-Z]`)
|
||||||
|
|
||||||
func sanitizeLog(s string) string {
|
func sanitizeLog(s string) string {
|
||||||
@@ -337,6 +345,35 @@ func (e *Engine) Start(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
go e.maintenancePruner(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Engine) maintenancePruner(ctx context.Context) {
|
||||||
|
ticker := time.NewTicker(maintPruneInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
e.pruneMaintenanceWindows()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
e.pruneMaintenanceWindows()
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *Engine) pruneMaintenanceWindows() {
|
||||||
|
pruned, err := e.db.PruneExpiredMaintenanceWindows(e.maintRetention)
|
||||||
|
if err != nil {
|
||||||
|
e.AddLog(fmt.Sprintf("Maintenance prune error: %v", err))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if pruned > 0 {
|
||||||
|
e.AddLog(fmt.Sprintf("Pruned %d expired maintenance window(s)", pruned))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) UpdateSiteConfig(site models.Site) {
|
func (e *Engine) UpdateSiteConfig(site models.Site) {
|
||||||
|
|||||||
@@ -73,13 +73,14 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -74,14 +74,15 @@ func (m *mockStore) LoadLogs(int) ([]string, error) { return nil
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
func (m *mockStore) PruneExpiredMaintenanceWindows(time.Duration) (int64, error) { return 0, nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
func (m *mockStore) GetStateChangesSince(int, time.Time) ([]models.StateChange, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -619,6 +619,18 @@ func (s *SQLStore) DeleteMaintenanceWindow(id int) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *SQLStore) PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error) {
|
||||||
|
cutoff := time.Now().Add(-retention)
|
||||||
|
result, err := s.db.Exec(
|
||||||
|
s.q("DELETE FROM maintenance_windows WHERE end_time IS NOT NULL AND end_time < ?"),
|
||||||
|
cutoff,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return result.RowsAffected()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *SQLStore) IsMonitorInMaintenance(monitorID int) (bool, error) {
|
func (s *SQLStore) IsMonitorInMaintenance(monitorID int) (bool, error) {
|
||||||
var count int
|
var count int
|
||||||
err := s.db.QueryRow(s.q(`SELECT COUNT(*) FROM maintenance_windows
|
err := s.db.QueryRow(s.q(`SELECT COUNT(*) FROM maintenance_windows
|
||||||
|
|||||||
@@ -315,3 +315,65 @@ func TestDeleteSiteCascade(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestPruneExpiredMaintenanceWindows(t *testing.T) {
|
||||||
|
s := newTestStore(t)
|
||||||
|
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
// Expired 10 days ago — should be pruned with 7d retention.
|
||||||
|
old := models.MaintenanceWindow{
|
||||||
|
MonitorID: 0,
|
||||||
|
Title: "Old Window",
|
||||||
|
Type: "maintenance",
|
||||||
|
StartTime: now.Add(-11 * 24 * time.Hour),
|
||||||
|
EndTime: now.Add(-10 * 24 * time.Hour),
|
||||||
|
}
|
||||||
|
if err := s.AddMaintenanceWindow(old); err != nil {
|
||||||
|
t.Fatalf("AddMaintenanceWindow (old): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expired 1 day ago — within 7d retention, should survive.
|
||||||
|
recent := models.MaintenanceWindow{
|
||||||
|
MonitorID: 0,
|
||||||
|
Title: "Recent Window",
|
||||||
|
Type: "maintenance",
|
||||||
|
StartTime: now.Add(-2 * 24 * time.Hour),
|
||||||
|
EndTime: now.Add(-1 * 24 * time.Hour),
|
||||||
|
}
|
||||||
|
if err := s.AddMaintenanceWindow(recent); err != nil {
|
||||||
|
t.Fatalf("AddMaintenanceWindow (recent): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ongoing — no end time, should survive.
|
||||||
|
ongoing := models.MaintenanceWindow{
|
||||||
|
MonitorID: 0,
|
||||||
|
Title: "Ongoing Window",
|
||||||
|
Type: "maintenance",
|
||||||
|
StartTime: now.Add(-1 * time.Hour),
|
||||||
|
}
|
||||||
|
if err := s.AddMaintenanceWindow(ongoing); err != nil {
|
||||||
|
t.Fatalf("AddMaintenanceWindow (ongoing): %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
pruned, err := s.PruneExpiredMaintenanceWindows(7 * 24 * time.Hour)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("PruneExpiredMaintenanceWindows: %v", err)
|
||||||
|
}
|
||||||
|
if pruned != 1 {
|
||||||
|
t.Errorf("expected 1 pruned, got %d", pruned)
|
||||||
|
}
|
||||||
|
|
||||||
|
all, err := s.GetAllMaintenanceWindows(100)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("GetAllMaintenanceWindows: %v", err)
|
||||||
|
}
|
||||||
|
if len(all) != 2 {
|
||||||
|
t.Fatalf("expected 2 remaining windows, got %d", len(all))
|
||||||
|
}
|
||||||
|
for _, w := range all {
|
||||||
|
if w.Title == "Old Window" {
|
||||||
|
t.Error("old window should have been pruned")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ type Store interface {
|
|||||||
AddMaintenanceWindow(mw models.MaintenanceWindow) error
|
AddMaintenanceWindow(mw models.MaintenanceWindow) error
|
||||||
EndMaintenanceWindow(id int) error
|
EndMaintenanceWindow(id int) error
|
||||||
DeleteMaintenanceWindow(id int) error
|
DeleteMaintenanceWindow(id int) error
|
||||||
|
PruneExpiredMaintenanceWindows(retention time.Duration) (int64, error)
|
||||||
IsMonitorInMaintenance(monitorID int) (bool, error)
|
IsMonitorInMaintenance(monitorID int) (bool, error)
|
||||||
|
|
||||||
// Preferences
|
// Preferences
|
||||||
|
|||||||
Reference in New Issue
Block a user