refactor(models): typed Status constants with IsBroken() predicate

Replace ~150 bare status string comparisons with typed models.Status constants (StatusUp, StatusDown, StatusPending, StatusLate, StatusStale, StatusSSLExp). Single IsBroken() method replaces the duplicated isBroken lambda in monitor.go and isDown function in sla.go. Adding a new status value (e.g. DEGRADED) now requires one constant definition instead of grep-and-pray across 16 files. CheckResult.Status stays string — the checker is the boundary between raw protocol results and typed status. Cast happens at the edge in handleStatusChange.
2026-06-11 15:56:51 -04:00
parent c3ae0bd80a
commit f00acbc280
16 changed files with 152 additions and 137 deletions
@@ -334,7 +334,7 @@ func (e *Engine) RecordHeartbeat(token string) bool {
 	}

 	var (
-		prevStatus string
+		prevStatus models.Status
 		name       string
 		alertID    int
 		downSince  time.Time
@@ -346,12 +346,12 @@ func (e *Engine) RecordHeartbeat(token string) bool {
 		downSince = s.StatusChangedAt // captured before mutation = when it went down

 		s.LastCheck = time.Now()
-		s.Status = "UP"
+		s.Status = models.StatusUp
 		s.FailureCount = 0
 		s.Latency = 0
 		s.LastError = ""
 		s.LastSuccessAt = time.Now()
-		if prevStatus != "UP" {
+		if prevStatus != models.StatusUp {
 			s.StatusChangedAt = time.Now()
 		}
 	})
@@ -360,13 +360,13 @@ func (e *Engine) RecordHeartbeat(token string) bool {
 	}

 	switch prevStatus {
-	case "PENDING":
+	case models.StatusPending:
 		e.AddLog(fmt.Sprintf("Push Monitor '%s' received first heartbeat", name))
-	case "LATE":
+	case models.StatusLate:
 		e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was late)", name))
-	case "STALE":
+	case models.StatusStale:
 		e.AddLog(fmt.Sprintf("Push Monitor '%s' heartbeat arrived (was stale)", name))
-	case "DOWN":
+	case models.StatusDown:
 		downDur := ""
 		if !downSince.IsZero() {
 			downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(downSince)))
@@ -375,8 +375,8 @@ func (e *Engine) RecordHeartbeat(token string) bool {
 		go e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.%s", name, downDur))
 	}

-	if prevStatus != "UP" && prevStatus != "PENDING" {
-		e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: prevStatus, toStatus: "UP"})
+	if prevStatus != models.StatusUp && prevStatus != models.StatusPending {
+		e.enqueueWrite(writeStateChange{siteID: targetID, fromStatus: string(prevStatus), toStatus: string(models.StatusUp)})
 	}

 	return true
@@ -434,12 +434,12 @@ func (e *Engine) Start(ctx context.Context) {
 				e.mu.RUnlock()
 				if !exists {
 					e.mu.Lock()
-					s.Status = "PENDING"
+					s.Status = models.StatusPending
 					if h, ok := e.GetHistory(s.ID); ok && len(h.Statuses) > 0 {
 						if h.Statuses[len(h.Statuses)-1] {
-							s.Status = "UP"
+							s.Status = models.StatusUp
 						} else {
-							s.Status = "DOWN"
+							s.Status = models.StatusDown
 						}
 						if len(h.Latencies) > 0 {
 							s.Latency = h.Latencies[len(h.Latencies)-1]
@@ -686,7 +686,7 @@ func (e *Engine) checkByID(ctx context.Context, id int) {
 }

 func (e *Engine) checkPush(_ context.Context, site models.Site) {
-	if site.Status == "PENDING" {
+	if site.Status == models.StatusPending {
 		return
 	}

@@ -702,16 +702,16 @@ func (e *Engine) checkPush(_ context.Context, site models.Site) {
 	now := time.Now()

 	if now.After(graceEnd) {
-		if site.Status != "DOWN" {
-			e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed")
+		if site.Status != models.StatusDown {
+			e.handleStatusChange(site, string(models.StatusDown), 0, 0, "heartbeat missed")
 		}
 	} else if now.After(staleMark) {
-		if site.Status != "STALE" {
-			e.handleStatusChange(site, "STALE", 0, 0, "heartbeat stale")
+		if site.Status != models.StatusStale {
+			e.handleStatusChange(site, string(models.StatusStale), 0, 0, "heartbeat stale")
 		}
 	} else if now.After(overdue) {
-		if site.Status != "LATE" {
-			e.handleStatusChange(site, "LATE", 0, 0, "heartbeat overdue")
+		if site.Status != models.StatusLate {
+			e.handleStatusChange(site, string(models.StatusLate), 0, 0, "heartbeat overdue")
 		}
 	}
 }
@@ -727,9 +727,10 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 	}

 	inMaint := e.isInMaintenance(snap.ID)
+	status := models.Status(rawStatus)

 	var (
-		prev, next            string
+		prev, next            models.Status
 		name, typ             string
 		alertID               int
 		failCount, maxRetries int
@@ -745,7 +746,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 	_, exists := e.applyState(snap.ID, func(s *models.Site) {
 		// A non-UP result computed from a stale snapshot must not override a
 		// heartbeat (or newer check) that landed while we were evaluating.
-		if rawStatus != "UP" && s.LastCheck.After(snap.LastCheck) {
+		if status != models.StatusUp && s.LastCheck.After(snap.LastCheck) {
 			skipped = true
 			return
 		}
@@ -764,24 +765,24 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 		s.HasSSL = snap.HasSSL
 		s.CertExpiry = snap.CertExpiry
 		s.LastError = errorReason
-		if rawStatus == "UP" {
+		if status == models.StatusUp {
 			s.LastSuccessAt = time.Now()
 			s.LastError = ""
 		}

 		// Status + failure-count transition, based on the CURRENT live status.
-		if rawStatus == "UP" {
+		if status == models.StatusUp {
 			s.FailureCount = 0
-			s.Status = "UP"
+			s.Status = models.StatusUp
 		} else {
 			if s.FailureCount <= s.MaxRetries {
 				s.FailureCount++
 			}
 			if s.FailureCount > s.MaxRetries {
-				if s.Status != rawStatus {
+				if s.Status != status {
 					confirmedDown = true
 				}
-				s.Status = rawStatus
+				s.Status = status
 				s.FailureCount = s.MaxRetries + 1
 			} else {
 				failedCheck = true
@@ -789,16 +790,16 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 		}
 		failCount = s.FailureCount

-		if s.Status != prev && prev != "PENDING" {
+		if s.Status != prev && prev != models.StatusPending {
 			s.StatusChangedAt = time.Now()
-		} else if s.StatusChangedAt.IsZero() && s.Status != "PENDING" {
+		} else if s.StatusChangedAt.IsZero() && s.Status != models.StatusPending {
 			s.StatusChangedAt = time.Now()
 		}

 		// SSL expiry warning (fresh HasSSL/CertExpiry + config threshold).
 		if typ == "http" && s.CheckSSL && s.HasSSL {
 			days := int(time.Until(s.CertExpiry).Hours() / 24)
-			if days <= s.ExpiryThreshold && !s.SentSSLWarning && rawStatus != "SSL EXP" {
+			if days <= s.ExpiryThreshold && !s.SentSSLWarning && status != models.StatusSSLExp {
 				sslWarnFire = true
 				sslDays = days
 				s.SentSSLWarning = true
@@ -815,7 +816,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 		return
 	}

-	e.recordCheck(snap.ID, latency, rawStatus == "UP")
+	e.recordCheck(snap.ID, latency, status == models.StatusUp)

 	if confirmedDown {
 		if errorReason != "" {
@@ -827,8 +828,8 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 		e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", name, failCount, maxRetries))
 	}

-	if changed && prev != "PENDING" {
-		e.enqueueWrite(writeStateChange{siteID: snap.ID, fromStatus: prev, toStatus: next, reason: errorReason})
+	if changed && prev != models.StatusPending {
+		e.enqueueWrite(writeStateChange{siteID: snap.ID, fromStatus: string(prev), toStatus: string(next), reason: errorReason})
 	}

 	if sslWarnFire {
@@ -839,13 +840,11 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 		}
 	}

-	isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
-
-	if prev == "UP" && next == "LATE" {
+	if prev == models.StatusUp && next == models.StatusLate {
 		e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat overdue", name))
 	}

-	if !isBroken(prev) && isBroken(next) && next != "PENDING" {
+	if !prev.IsBroken() && next.IsBroken() && next != models.StatusPending {
 		if inMaint {
 			e.AddLog(fmt.Sprintf("Monitor '%s' is DOWN (alerts suppressed — maintenance)", name))
 		} else {
@@ -859,7 +858,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 			e.triggerAlert(alertID, "🚨 ALERT", msg)
 		}
 	}
-	if isBroken(prev) && next == "UP" {
+	if prev.IsBroken() && next == models.StatusUp {
 		downDur := ""
 		if !downSince.IsZero() {
 			downDur = fmt.Sprintf(" (was down %s)", fmtDurationShort(time.Since(downSince)))
@@ -869,7 +868,7 @@ func (e *Engine) handleStatusChange(snap models.Site, rawStatus string, code int
 			e.triggerAlert(alertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP%s", name, downDur))
 		}
 	}
-	if prev == "LATE" && next == "UP" && !isBroken(prev) {
+	if prev == models.StatusLate && next == models.StatusUp && !prev.IsBroken() {
 		e.AddLog(fmt.Sprintf("Monitor '%s' heartbeat arrived (was late)", name))
 	}
 }
@@ -991,12 +990,12 @@ func (e *Engine) GetDisplayStatus(site models.Site) string {
 	if e.isInMaintenance(site.ID) {
 		return "MAINT"
 	}
-	return site.Status
+	return string(site.Status)
 }

 func (e *Engine) checkGroup(_ context.Context, site models.Site) {
 	e.mu.RLock()
-	status := "UP"
+	status := models.StatusUp
 	hasChildren := false
 	for _, child := range e.liveState {
 		if child.ParentID != site.ID || child.Type == "group" {
@@ -1006,20 +1005,20 @@ func (e *Engine) checkGroup(_ context.Context, site models.Site) {
 		if child.Paused || e.isInMaintenance(child.ID) {
 			continue
 		}
-		if child.Status == "DOWN" || child.Status == "SSL EXP" {
-			status = "DOWN"
-		} else if child.Status == "STALE" && status != "DOWN" {
-			status = "STALE"
-		} else if child.Status == "LATE" && status != "DOWN" && status != "STALE" {
-			status = "LATE"
-		} else if child.Status == "PENDING" && status != "DOWN" && status != "STALE" && status != "LATE" {
-			status = "PENDING"
+		if child.Status == models.StatusDown || child.Status == models.StatusSSLExp {
+			status = models.StatusDown
+		} else if child.Status == models.StatusStale && status != models.StatusDown {
+			status = models.StatusStale
+		} else if child.Status == models.StatusLate && status != models.StatusDown && status != models.StatusStale {
+			status = models.StatusLate
+		} else if child.Status == models.StatusPending && status != models.StatusDown && status != models.StatusStale && status != models.StatusLate {
+			status = models.StatusPending
 		}
 	}
 	e.mu.RUnlock()

 	if !hasChildren {
-		status = "PENDING"
+		status = models.StatusPending
 	}

 	e.applyState(site.ID, func(s *models.Site) {
@@ -1072,15 +1071,15 @@ func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, i

 	aggUp, avgLatency := AggregateStatus(results, e.aggStrategy)

-	rawStatus := "UP"
+	probeStatus := models.StatusUp
 	if !aggUp {
-		rawStatus = "DOWN"
+		probeStatus = models.StatusDown
 	}

 	updatedSite := site
 	updatedSite.Latency = time.Duration(avgLatency)
 	updatedSite.LastCheck = time.Now()
-	e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency), errorReason)
+	e.handleStatusChange(updatedSite, string(probeStatus), 0, time.Duration(avgLatency), errorReason)
 }

 func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {