feat: show error reason when monitors go DOWN #33
@@ -61,13 +61,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) Close() error { return nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
|
func (m *mockStore) Close() error { return nil }
|
||||||
|
|
||||||
// --- Cluster Start Tests ---
|
// --- Cluster Start Tests ---
|
||||||
|
|
||||||
|
|||||||
@@ -127,9 +127,10 @@ func probeFetchAssignments(ctx context.Context, client *http.Client, cfg ProbeCo
|
|||||||
}
|
}
|
||||||
|
|
||||||
type probeResultItem struct {
|
type probeResultItem struct {
|
||||||
SiteID int `json:"site_id"`
|
SiteID int `json:"site_id"`
|
||||||
LatencyNs int64 `json:"latency_ns"`
|
LatencyNs int64 `json:"latency_ns"`
|
||||||
IsUp bool `json:"is_up"`
|
IsUp bool `json:"is_up"`
|
||||||
|
ErrorReason string `json:"error_reason,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func probeExecuteChecks(ctx context.Context, sites []models.Site, strict, insecure *http.Client, allowPrivate bool) []probeResultItem {
|
func probeExecuteChecks(ctx context.Context, sites []models.Site, strict, insecure *http.Client, allowPrivate bool) []probeResultItem {
|
||||||
@@ -154,9 +155,10 @@ loop:
|
|||||||
cr := monitor.RunCheck(s, strict, insecure, false, allowPrivate)
|
cr := monitor.RunCheck(s, strict, insecure, false, allowPrivate)
|
||||||
mu.Lock()
|
mu.Lock()
|
||||||
results = append(results, probeResultItem{
|
results = append(results, probeResultItem{
|
||||||
SiteID: s.ID,
|
SiteID: s.ID,
|
||||||
LatencyNs: cr.LatencyNs,
|
LatencyNs: cr.LatencyNs,
|
||||||
IsUp: cr.Status == "UP",
|
IsUp: cr.Status == "UP",
|
||||||
|
ErrorReason: cr.ErrorReason,
|
||||||
})
|
})
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
}(site)
|
}(site)
|
||||||
|
|||||||
@@ -2,13 +2,14 @@ package metrics
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
|
||||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||||
|
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||||
)
|
)
|
||||||
|
|
||||||
type mockStore struct {
|
type mockStore struct {
|
||||||
@@ -58,13 +59,15 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) Close() error { return nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
|
func (m *mockStore) Close() error { return nil }
|
||||||
|
|
||||||
func TestMetricsHandler(t *testing.T) {
|
func TestMetricsHandler(t *testing.T) {
|
||||||
ms := &mockStore{
|
ms := &mockStore{
|
||||||
|
|||||||
@@ -27,14 +27,26 @@ type Site struct {
|
|||||||
Paused bool
|
Paused bool
|
||||||
Regions string
|
Regions string
|
||||||
|
|
||||||
FailureCount int
|
FailureCount int
|
||||||
Status string
|
Status string
|
||||||
StatusCode int
|
StatusCode int
|
||||||
Latency time.Duration
|
Latency time.Duration
|
||||||
CertExpiry time.Time
|
CertExpiry time.Time
|
||||||
HasSSL bool
|
HasSSL bool
|
||||||
LastCheck time.Time
|
LastCheck time.Time
|
||||||
SentSSLWarning bool
|
SentSSLWarning bool
|
||||||
|
LastError string
|
||||||
|
StatusChangedAt time.Time
|
||||||
|
LastSuccessAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
type StateChange struct {
|
||||||
|
ID int
|
||||||
|
SiteID int
|
||||||
|
FromStatus string
|
||||||
|
ToStatus string
|
||||||
|
ErrorReason string
|
||||||
|
ChangedAt time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
type AlertConfig struct {
|
type AlertConfig struct {
|
||||||
|
|||||||
@@ -11,10 +11,11 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type NodeResult struct {
|
type NodeResult struct {
|
||||||
NodeID string
|
NodeID string
|
||||||
IsUp bool
|
IsUp bool
|
||||||
LatencyNs int64
|
LatencyNs int64
|
||||||
CheckedAt time.Time
|
CheckedAt time.Time
|
||||||
|
ErrorReason string
|
||||||
}
|
}
|
||||||
|
|
||||||
func AggregateStatus(results []NodeResult, strategy AggregationStrategy) (isUp bool, avgLatencyNs int64) {
|
func AggregateStatus(results []NodeResult, strategy AggregationStrategy) (isUp bool, avgLatencyNs int64) {
|
||||||
|
|||||||
+34
-15
@@ -2,6 +2,7 @@ package monitor
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strconv"
|
"strconv"
|
||||||
@@ -15,12 +16,13 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type CheckResult struct {
|
type CheckResult struct {
|
||||||
SiteID int
|
SiteID int
|
||||||
Status string // "UP", "DOWN", "SSL EXP"
|
Status string // "UP", "DOWN", "SSL EXP"
|
||||||
StatusCode int
|
StatusCode int
|
||||||
LatencyNs int64
|
LatencyNs int64
|
||||||
HasSSL bool
|
HasSSL bool
|
||||||
CertExpiry time.Time
|
CertExpiry time.Time
|
||||||
|
ErrorReason string
|
||||||
}
|
}
|
||||||
|
|
||||||
func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bool, allowPrivate ...bool) CheckResult {
|
func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bool, allowPrivate ...bool) CheckResult {
|
||||||
@@ -35,7 +37,7 @@ func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bo
|
|||||||
if ips, err := net.LookupIP(host); err == nil {
|
if ips, err := net.LookupIP(host); err == nil {
|
||||||
for _, ip := range ips {
|
for _, ip := range ips {
|
||||||
if isPrivateIP(ip) {
|
if isPrivateIP(ip) {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "target resolves to private IP"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -52,7 +54,7 @@ func RunCheck(site models.Site, strict, insecure *http.Client, globalInsecure bo
|
|||||||
case "dns":
|
case "dns":
|
||||||
return runDNSCheck(site)
|
return runDNSCheck(site)
|
||||||
default:
|
default:
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "unsupported monitor type: " + site.Type}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -68,7 +70,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
|||||||
|
|
||||||
req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
|
req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "invalid request: " + err.Error()}
|
||||||
}
|
}
|
||||||
|
|
||||||
client := strict
|
client := strict
|
||||||
@@ -88,6 +90,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
result.Status = "DOWN"
|
result.Status = "DOWN"
|
||||||
|
result.ErrorReason = truncateError(err.Error(), 256)
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
@@ -95,6 +98,11 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
|||||||
result.StatusCode = resp.StatusCode
|
result.StatusCode = resp.StatusCode
|
||||||
if !isCodeAccepted(resp.StatusCode, site.AcceptedCodes) {
|
if !isCodeAccepted(resp.StatusCode, site.AcceptedCodes) {
|
||||||
result.Status = "DOWN"
|
result.Status = "DOWN"
|
||||||
|
expected := site.AcceptedCodes
|
||||||
|
if expected == "" {
|
||||||
|
expected = "200-299"
|
||||||
|
}
|
||||||
|
result.ErrorReason = fmt.Sprintf("HTTP %d (expected %s)", resp.StatusCode, expected)
|
||||||
}
|
}
|
||||||
|
|
||||||
if site.CheckSSL && resp.TLS != nil && len(resp.TLS.PeerCertificates) > 0 {
|
if site.CheckSSL && resp.TLS != nil && len(resp.TLS.PeerCertificates) > 0 {
|
||||||
@@ -103,6 +111,7 @@ func runHTTPCheck(site models.Site, strict, insecure *http.Client, globalInsecur
|
|||||||
result.CertExpiry = cert.NotAfter
|
result.CertExpiry = cert.NotAfter
|
||||||
if time.Now().After(cert.NotAfter) {
|
if time.Now().After(cert.NotAfter) {
|
||||||
result.Status = "SSL EXP"
|
result.Status = "SSL EXP"
|
||||||
|
result.ErrorReason = "SSL certificate expired"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -117,7 +126,7 @@ func runPingCheck(site models.Site) CheckResult {
|
|||||||
|
|
||||||
pinger, err := probing.NewPinger(host)
|
pinger, err := probing.NewPinger(host)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN"}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", ErrorReason: "ping setup: " + err.Error()}
|
||||||
}
|
}
|
||||||
pinger.Count = 1
|
pinger.Count = 1
|
||||||
pinger.Timeout = siteTimeout(site)
|
pinger.Timeout = siteTimeout(site)
|
||||||
@@ -127,8 +136,11 @@ func runPingCheck(site models.Site) CheckResult {
|
|||||||
err = pinger.Run()
|
err = pinger.Run()
|
||||||
latency := time.Since(start)
|
latency := time.Since(start)
|
||||||
|
|
||||||
if err != nil || pinger.Statistics().PacketsRecv == 0 {
|
if err != nil {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "ping failed: " + err.Error()}
|
||||||
|
}
|
||||||
|
if pinger.Statistics().PacketsRecv == 0 {
|
||||||
|
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "no ICMP response"}
|
||||||
}
|
}
|
||||||
|
|
||||||
stats := pinger.Statistics()
|
stats := pinger.Statistics()
|
||||||
@@ -148,7 +160,7 @@ func runPortCheck(site models.Site) CheckResult {
|
|||||||
latency := time.Since(start)
|
latency := time.Since(start)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: truncateError(err.Error(), 256)}
|
||||||
}
|
}
|
||||||
_ = conn.Close()
|
_ = conn.Close()
|
||||||
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
||||||
@@ -199,10 +211,10 @@ func runDNSCheck(site models.Site) CheckResult {
|
|||||||
latency := time.Since(start)
|
latency := time.Since(start)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds()}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS query failed: " + err.Error()}
|
||||||
}
|
}
|
||||||
if r.Rcode != dns.RcodeSuccess {
|
if r.Rcode != dns.RcodeSuccess {
|
||||||
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds()}
|
return CheckResult{SiteID: site.ID, Status: "DOWN", StatusCode: r.Rcode, LatencyNs: latency.Nanoseconds(), ErrorReason: "DNS RCODE: " + dns.RcodeToString[r.Rcode]}
|
||||||
}
|
}
|
||||||
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
return CheckResult{SiteID: site.ID, Status: "UP", LatencyNs: latency.Nanoseconds()}
|
||||||
}
|
}
|
||||||
@@ -235,3 +247,10 @@ func isCodeAccepted(code int, accepted string) bool {
|
|||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func truncateError(s string, max int) string {
|
||||||
|
if len(s) <= max {
|
||||||
|
return s
|
||||||
|
}
|
||||||
|
return s[:max-3] + "..."
|
||||||
|
}
|
||||||
|
|||||||
+50
-11
@@ -283,6 +283,9 @@ func (e *Engine) UpdateSiteConfig(site models.Site) {
|
|||||||
site.LastCheck = existing.LastCheck
|
site.LastCheck = existing.LastCheck
|
||||||
site.SentSSLWarning = existing.SentSSLWarning
|
site.SentSSLWarning = existing.SentSSLWarning
|
||||||
site.FailureCount = existing.FailureCount
|
site.FailureCount = existing.FailureCount
|
||||||
|
site.LastError = existing.LastError
|
||||||
|
site.StatusChangedAt = existing.StatusChangedAt
|
||||||
|
site.LastSuccessAt = existing.LastSuccessAt
|
||||||
e.liveState[site.ID] = site
|
e.liveState[site.ID] = site
|
||||||
e.addToTokenIndex(site)
|
e.addToTokenIndex(site)
|
||||||
}
|
}
|
||||||
@@ -393,33 +396,45 @@ func (e *Engine) checkByID(id int) {
|
|||||||
updatedSite.CertExpiry = result.CertExpiry
|
updatedSite.CertExpiry = result.CertExpiry
|
||||||
updatedSite.Latency = time.Duration(result.LatencyNs)
|
updatedSite.Latency = time.Duration(result.LatencyNs)
|
||||||
updatedSite.LastCheck = time.Now()
|
updatedSite.LastCheck = time.Now()
|
||||||
e.handleStatusChange(updatedSite, result.Status, result.StatusCode, time.Duration(result.LatencyNs))
|
e.handleStatusChange(updatedSite, result.Status, result.StatusCode, time.Duration(result.LatencyNs), result.ErrorReason)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) checkPush(site models.Site) {
|
func (e *Engine) checkPush(site models.Site) {
|
||||||
deadline := site.LastCheck.Add(time.Duration(site.Interval) * time.Second).Add(pushGracePeriod)
|
deadline := site.LastCheck.Add(time.Duration(site.Interval) * time.Second).Add(pushGracePeriod)
|
||||||
if time.Now().After(deadline) {
|
if time.Now().After(deadline) {
|
||||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
e.handleStatusChange(site, "DOWN", 0, 0, "heartbeat missed")
|
||||||
} else if site.Status != "UP" {
|
} else if site.Status != "UP" {
|
||||||
e.handleStatusChange(site, "UP", 200, 0)
|
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration) {
|
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration, errorReason string) {
|
||||||
if !e.IsActive() {
|
if !e.IsActive() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
newState := site
|
newState := site
|
||||||
newState.StatusCode = code
|
newState.StatusCode = code
|
||||||
|
newState.LastError = errorReason
|
||||||
|
|
||||||
|
if rawStatus == "UP" {
|
||||||
|
newState.LastSuccessAt = time.Now()
|
||||||
|
newState.LastError = ""
|
||||||
|
} else {
|
||||||
|
newState.LastSuccessAt = site.LastSuccessAt
|
||||||
|
}
|
||||||
|
|
||||||
if site.Status == "UP" && rawStatus != "UP" {
|
if site.Status == "UP" && rawStatus != "UP" {
|
||||||
newState.FailureCount++
|
newState.FailureCount++
|
||||||
if newState.FailureCount > site.MaxRetries {
|
if newState.FailureCount > site.MaxRetries {
|
||||||
newState.Status = rawStatus
|
newState.Status = rawStatus
|
||||||
newState.FailureCount = site.MaxRetries + 1
|
newState.FailureCount = site.MaxRetries + 1
|
||||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
if errorReason != "" {
|
||||||
|
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN: %s", site.Name, errorReason))
|
||||||
|
} else {
|
||||||
|
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", site.Name, newState.FailureCount, site.MaxRetries))
|
e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", site.Name, newState.FailureCount, site.MaxRetries))
|
||||||
}
|
}
|
||||||
@@ -431,6 +446,14 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
|||||||
newState.FailureCount = site.MaxRetries + 1
|
newState.FailureCount = site.MaxRetries + 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if newState.Status != site.Status && site.Status != "PENDING" {
|
||||||
|
newState.StatusChangedAt = time.Now()
|
||||||
|
} else if site.StatusChangedAt.IsZero() && newState.Status != "PENDING" {
|
||||||
|
newState.StatusChangedAt = time.Now()
|
||||||
|
} else {
|
||||||
|
newState.StatusChangedAt = site.StatusChangedAt
|
||||||
|
}
|
||||||
|
|
||||||
inMaint := e.isInMaintenance(site.ID)
|
inMaint := e.isInMaintenance(site.ID)
|
||||||
|
|
||||||
if site.Type == "http" && site.CheckSSL && site.HasSSL {
|
if site.Type == "http" && site.CheckSSL && site.HasSSL {
|
||||||
@@ -455,12 +478,19 @@ func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int
|
|||||||
|
|
||||||
e.recordCheck(site.ID, latency, rawStatus == "UP")
|
e.recordCheck(site.ID, latency, rawStatus == "UP")
|
||||||
|
|
||||||
|
if newState.Status != site.Status && site.Status != "PENDING" {
|
||||||
|
go func() { _ = e.db.SaveStateChange(site.ID, site.Status, newState.Status, errorReason) }()
|
||||||
|
}
|
||||||
|
|
||||||
isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
|
isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
|
||||||
if !isBroken(site.Status) && isBroken(newState.Status) && newState.Status != "PENDING" {
|
if !isBroken(site.Status) && isBroken(newState.Status) && newState.Status != "PENDING" {
|
||||||
if inMaint {
|
if inMaint {
|
||||||
e.AddLog(fmt.Sprintf("Monitor '%s' is DOWN (alerts suppressed — maintenance)", site.Name))
|
e.AddLog(fmt.Sprintf("Monitor '%s' is DOWN (alerts suppressed — maintenance)", site.Name))
|
||||||
} else {
|
} else {
|
||||||
msg := fmt.Sprintf("Monitor '%s' is DOWN (%s)", site.Name, rawStatus)
|
msg := fmt.Sprintf("Monitor '%s' is DOWN (%s)", site.Name, rawStatus)
|
||||||
|
if errorReason != "" {
|
||||||
|
msg = fmt.Sprintf("Monitor '%s' is DOWN: %s", site.Name, errorReason)
|
||||||
|
}
|
||||||
if site.Type == "push" {
|
if site.Type == "push" {
|
||||||
msg = fmt.Sprintf("Push Monitor '%s' missed heartbeat.", site.Name)
|
msg = fmt.Sprintf("Push Monitor '%s' missed heartbeat.", site.Name)
|
||||||
}
|
}
|
||||||
@@ -554,16 +584,17 @@ func (e *Engine) SetAggStrategy(strategy AggregationStrategy) {
|
|||||||
e.aggStrategy = strategy
|
e.aggStrategy = strategy
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, isUp bool) {
|
func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, isUp bool, errorReason string) {
|
||||||
e.probeResultsMu.Lock()
|
e.probeResultsMu.Lock()
|
||||||
if e.probeResults[siteID] == nil {
|
if e.probeResults[siteID] == nil {
|
||||||
e.probeResults[siteID] = make(map[string]NodeResult)
|
e.probeResults[siteID] = make(map[string]NodeResult)
|
||||||
}
|
}
|
||||||
e.probeResults[siteID][nodeID] = NodeResult{
|
e.probeResults[siteID][nodeID] = NodeResult{
|
||||||
NodeID: nodeID,
|
NodeID: nodeID,
|
||||||
IsUp: isUp,
|
IsUp: isUp,
|
||||||
LatencyNs: latencyNs,
|
LatencyNs: latencyNs,
|
||||||
CheckedAt: time.Now(),
|
CheckedAt: time.Now(),
|
||||||
|
ErrorReason: errorReason,
|
||||||
}
|
}
|
||||||
results := make([]NodeResult, 0, len(e.probeResults[siteID]))
|
results := make([]NodeResult, 0, len(e.probeResults[siteID]))
|
||||||
for _, r := range e.probeResults[siteID] {
|
for _, r := range e.probeResults[siteID] {
|
||||||
@@ -588,7 +619,7 @@ func (e *Engine) IngestProbeResult(nodeID string, siteID int, latencyNs int64, i
|
|||||||
updatedSite := site
|
updatedSite := site
|
||||||
updatedSite.Latency = time.Duration(avgLatency)
|
updatedSite.Latency = time.Duration(avgLatency)
|
||||||
updatedSite.LastCheck = time.Now()
|
updatedSite.LastCheck = time.Now()
|
||||||
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency))
|
e.handleStatusChange(updatedSite, rawStatus, 0, time.Duration(avgLatency), errorReason)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
||||||
@@ -601,3 +632,11 @@ func (e *Engine) GetProbeResults(siteID int) map[string]NodeResult {
|
|||||||
}
|
}
|
||||||
return cp
|
return cp
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (e *Engine) GetStateChanges(siteID int, limit int) []models.StateChange {
|
||||||
|
changes, err := e.db.GetStateChanges(siteID, limit)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return changes
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,10 +2,11 @@ package monitor
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||||
)
|
)
|
||||||
|
|
||||||
// --- Mock Store ---
|
// --- Mock Store ---
|
||||||
@@ -68,12 +69,14 @@ func (m *mockStore) GetActiveMaintenanceWindows() ([]models.MaintenanceWindow, e
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) Close() error { return nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
|
func (m *mockStore) Close() error { return nil }
|
||||||
|
|
||||||
func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) {
|
func (m *mockStore) GetAllAlerts() ([]models.AlertConfig, error) {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
@@ -174,7 +177,7 @@ func TestHandleStatusChange_PendingToUp(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "PENDING", MaxRetries: 3, AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "PENDING", MaxRetries: 3, AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 10*time.Millisecond)
|
e.handleStatusChange(site, "UP", 200, 10*time.Millisecond, "")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "UP" {
|
if s.Status != "UP" {
|
||||||
@@ -195,7 +198,7 @@ func TestHandleStatusChange_UpIncrementFailure(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 3, FailureCount: 0}
|
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 3, FailureCount: 0}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "UP" {
|
if s.Status != "UP" {
|
||||||
@@ -213,7 +216,7 @@ func TestHandleStatusChange_UpToDown_ExceedsRetries(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 2, FailureCount: 2, AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 2, FailureCount: 2, AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "DOWN" {
|
if s.Status != "DOWN" {
|
||||||
@@ -236,7 +239,7 @@ func TestHandleStatusChange_UpToDown_ZeroRetries(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, FailureCount: 0, AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, FailureCount: 0, AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "DOWN" {
|
if s.Status != "DOWN" {
|
||||||
@@ -255,7 +258,7 @@ func TestHandleStatusChange_DownToUp_Recovery(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", FailureCount: 4, AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "DOWN", FailureCount: 4, AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 5*time.Millisecond)
|
e.handleStatusChange(site, "UP", 200, 5*time.Millisecond, "")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "UP" {
|
if s.Status != "UP" {
|
||||||
@@ -276,7 +279,7 @@ func TestHandleStatusChange_DownStaysDown(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", MaxRetries: 2, FailureCount: 3}
|
site := models.Site{ID: 1, Name: "test", Status: "DOWN", MaxRetries: 2, FailureCount: 3}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "DOWN" {
|
if s.Status != "DOWN" {
|
||||||
@@ -295,7 +298,7 @@ func TestHandleStatusChange_SSLExpired(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "SSL EXP", 0, 0)
|
e.handleStatusChange(site, "SSL EXP", 0, 0, "SSL certificate expired")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "SSL EXP" {
|
if s.Status != "SSL EXP" {
|
||||||
@@ -315,7 +318,7 @@ func TestHandleStatusChange_AlertSuppressedMaintenance(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "UP", MaxRetries: 0, AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "DOWN" {
|
if s.Status != "DOWN" {
|
||||||
@@ -346,7 +349,7 @@ func TestHandleStatusChange_RecoverySuppressedMaintenance(t *testing.T) {
|
|||||||
site := models.Site{ID: 1, Name: "test", Status: "DOWN", AlertID: 1}
|
site := models.Site{ID: 1, Name: "test", Status: "DOWN", AlertID: 1}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 0)
|
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "UP" {
|
if s.Status != "UP" {
|
||||||
@@ -370,7 +373,7 @@ func TestHandleStatusChange_SSLWarning(t *testing.T) {
|
|||||||
}
|
}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 0)
|
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if !s.SentSSLWarning {
|
if !s.SentSSLWarning {
|
||||||
@@ -393,7 +396,7 @@ func TestHandleStatusChange_SSLWarningNotRepeated(t *testing.T) {
|
|||||||
}
|
}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 0)
|
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||||
|
|
||||||
waitAsync()
|
waitAsync()
|
||||||
if len(ms.getAlertCallsSnapshot()) != 0 {
|
if len(ms.getAlertCallsSnapshot()) != 0 {
|
||||||
@@ -412,7 +415,7 @@ func TestHandleStatusChange_SSLWarningReset(t *testing.T) {
|
|||||||
}
|
}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 0)
|
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.SentSSLWarning {
|
if s.SentSSLWarning {
|
||||||
@@ -433,7 +436,7 @@ func TestHandleStatusChange_SSLWarningSuppressedMaint(t *testing.T) {
|
|||||||
}
|
}
|
||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
|
|
||||||
e.handleStatusChange(site, "UP", 200, 0)
|
e.handleStatusChange(site, "UP", 200, 0, "")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if !s.SentSSLWarning {
|
if !s.SentSSLWarning {
|
||||||
@@ -452,7 +455,7 @@ func TestHandleStatusChange_InactiveEngine(t *testing.T) {
|
|||||||
injectSite(e, site)
|
injectSite(e, site)
|
||||||
e.SetActive(false)
|
e.SetActive(false)
|
||||||
|
|
||||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
e.handleStatusChange(site, "DOWN", 0, 0, "test error")
|
||||||
|
|
||||||
s, _ := getSite(e, 1)
|
s, _ := getSite(e, 1)
|
||||||
if s.Status != "UP" {
|
if s.Status != "UP" {
|
||||||
@@ -991,7 +994,7 @@ func TestConcurrent_HandleStatusChangeAndGetState(t *testing.T) {
|
|||||||
wg.Add(2)
|
wg.Add(2)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
e.handleStatusChange(site, "DOWN", 500, 0)
|
e.handleStatusChange(site, "DOWN", 500, 0, "test error")
|
||||||
}()
|
}()
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|||||||
@@ -403,9 +403,10 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
|
|||||||
var req struct {
|
var req struct {
|
||||||
NodeID string `json:"node_id"`
|
NodeID string `json:"node_id"`
|
||||||
Results []struct {
|
Results []struct {
|
||||||
SiteID int `json:"site_id"`
|
SiteID int `json:"site_id"`
|
||||||
LatencyNs int64 `json:"latency_ns"`
|
LatencyNs int64 `json:"latency_ns"`
|
||||||
IsUp bool `json:"is_up"`
|
IsUp bool `json:"is_up"`
|
||||||
|
ErrorReason string `json:"error_reason"`
|
||||||
} `json:"results"`
|
} `json:"results"`
|
||||||
}
|
}
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
@@ -420,7 +421,7 @@ func Start(cfg ServerConfig, s store.Store, eng *monitor.Engine) *http.Server {
|
|||||||
if err := s.SaveCheckFromNode(result.SiteID, req.NodeID, result.LatencyNs, result.IsUp); err != nil {
|
if err := s.SaveCheckFromNode(result.SiteID, req.NodeID, result.LatencyNs, result.IsUp); err != nil {
|
||||||
log.Printf("Failed to save probe result: %v", err)
|
log.Printf("Failed to save probe result: %v", err)
|
||||||
}
|
}
|
||||||
eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp)
|
eng.IngestProbeResult(req.NodeID, result.SiteID, result.LatencyNs, result.IsUp, result.ErrorReason)
|
||||||
}
|
}
|
||||||
if err := s.UpdateNodeLastSeen(req.NodeID); err != nil {
|
if err := s.UpdateNodeLastSeen(req.NodeID); err != nil {
|
||||||
log.Printf("Failed to update node last seen: %v", err)
|
log.Printf("Failed to update node last seen: %v", err)
|
||||||
|
|||||||
@@ -4,13 +4,14 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
|
||||||
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"gitea.lerkolabs.com/lerko/uptop/internal/models"
|
||||||
|
"gitea.lerkolabs.com/lerko/uptop/internal/monitor"
|
||||||
)
|
)
|
||||||
|
|
||||||
// --- Mock Store ---
|
// --- Mock Store ---
|
||||||
@@ -69,13 +70,15 @@ func (m *mockStore) LoadLogs(int) ([]string, error) { return nil, nil
|
|||||||
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
func (m *mockStore) GetAllMaintenanceWindows(int) ([]models.MaintenanceWindow, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
func (m *mockStore) AddMaintenanceWindow(models.MaintenanceWindow) error { return nil }
|
||||||
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) EndMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
func (m *mockStore) DeleteMaintenanceWindow(int) error { return nil }
|
||||||
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
func (m *mockStore) IsMonitorInMaintenance(int) (bool, error) { return false, nil }
|
||||||
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
func (m *mockStore) GetPreference(string) (string, error) { return "", nil }
|
||||||
func (m *mockStore) SetPreference(string, string) error { return nil }
|
func (m *mockStore) SetPreference(string, string) error { return nil }
|
||||||
func (m *mockStore) Close() error { return nil }
|
func (m *mockStore) SaveStateChange(int, string, string, string) error { return nil }
|
||||||
|
func (m *mockStore) GetStateChanges(int, int) ([]models.StateChange, error) { return nil, nil }
|
||||||
|
func (m *mockStore) Close() error { return nil }
|
||||||
|
|
||||||
func (m *mockStore) ExportData() (models.Backup, error) {
|
func (m *mockStore) ExportData() (models.Backup, error) {
|
||||||
return models.Backup{
|
return models.Backup{
|
||||||
|
|||||||
@@ -72,6 +72,15 @@ func (d *PostgresDialect) CreateTablesSQL() []string {
|
|||||||
key TEXT PRIMARY KEY,
|
key TEXT PRIMARY KEY,
|
||||||
value TEXT NOT NULL
|
value TEXT NOT NULL
|
||||||
)`,
|
)`,
|
||||||
|
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
site_id INTEGER NOT NULL,
|
||||||
|
from_status TEXT NOT NULL,
|
||||||
|
to_status TEXT NOT NULL,
|
||||||
|
error_reason TEXT DEFAULT '',
|
||||||
|
changed_at TIMESTAMP DEFAULT NOW()
|
||||||
|
)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -79,6 +79,15 @@ func (d *SQLiteDialect) CreateTablesSQL() []string {
|
|||||||
key TEXT PRIMARY KEY,
|
key TEXT PRIMARY KEY,
|
||||||
value TEXT NOT NULL
|
value TEXT NOT NULL
|
||||||
)`,
|
)`,
|
||||||
|
`CREATE TABLE IF NOT EXISTS state_changes (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
site_id INTEGER NOT NULL,
|
||||||
|
from_status TEXT NOT NULL,
|
||||||
|
to_status TEXT NOT NULL,
|
||||||
|
error_reason TEXT DEFAULT '',
|
||||||
|
changed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||||
|
)`,
|
||||||
|
`CREATE INDEX IF NOT EXISTS idx_state_changes_site ON state_changes(site_id, changed_at DESC)`,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -347,6 +347,29 @@ func (s *SQLStore) DeleteUser(id int) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *SQLStore) SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error {
|
||||||
|
_, err := s.db.Exec(s.q("INSERT INTO state_changes (site_id, from_status, to_status, error_reason) VALUES (?, ?, ?, ?)"),
|
||||||
|
siteID, fromStatus, toStatus, errorReason)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SQLStore) GetStateChanges(siteID int, limit int) ([]models.StateChange, error) {
|
||||||
|
rows, err := s.db.Query(s.q("SELECT id, site_id, from_status, to_status, error_reason, changed_at FROM state_changes WHERE site_id = ? ORDER BY changed_at DESC LIMIT ?"), siteID, limit)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer rows.Close()
|
||||||
|
var changes []models.StateChange
|
||||||
|
for rows.Next() {
|
||||||
|
var sc models.StateChange
|
||||||
|
if err := rows.Scan(&sc.ID, &sc.SiteID, &sc.FromStatus, &sc.ToStatus, &sc.ErrorReason, &sc.ChangedAt); err != nil {
|
||||||
|
return changes, err
|
||||||
|
}
|
||||||
|
changes = append(changes, sc)
|
||||||
|
}
|
||||||
|
return changes, rows.Err()
|
||||||
|
}
|
||||||
|
|
||||||
func (s *SQLStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
|
func (s *SQLStore) SaveCheck(siteID int, latencyNs int64, isUp bool) error {
|
||||||
return s.SaveCheckFromNode(siteID, "", latencyNs, isUp)
|
return s.SaveCheckFromNode(siteID, "", latencyNs, isUp)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -38,6 +38,10 @@ type Store interface {
|
|||||||
SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
|
SaveCheckFromNode(siteID int, nodeID string, latencyNs int64, isUp bool) error
|
||||||
LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
|
LoadAllHistory(limit int) (map[int][]models.CheckRecord, error)
|
||||||
|
|
||||||
|
// State Changes
|
||||||
|
SaveStateChange(siteID int, fromStatus, toStatus, errorReason string) error
|
||||||
|
GetStateChanges(siteID int, limit int) ([]models.StateChange, error)
|
||||||
|
|
||||||
// Nodes
|
// Nodes
|
||||||
RegisterNode(node models.ProbeNode) error
|
RegisterNode(node models.ProbeNode) error
|
||||||
GetNode(id string) (models.ProbeNode, error)
|
GetNode(id string) (models.ProbeNode, error)
|
||||||
|
|||||||
@@ -309,6 +309,29 @@ func fmtStatus(status string, paused bool, inMaint bool) string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func fmtDuration(d time.Duration) string {
|
||||||
|
if d < time.Minute {
|
||||||
|
return fmt.Sprintf("%ds", int(d.Seconds()))
|
||||||
|
}
|
||||||
|
if d < time.Hour {
|
||||||
|
return fmt.Sprintf("%dm", int(d.Minutes()))
|
||||||
|
}
|
||||||
|
if d < 24*time.Hour {
|
||||||
|
h := int(d.Hours())
|
||||||
|
m := int(d.Minutes()) % 60
|
||||||
|
if m > 0 {
|
||||||
|
return fmt.Sprintf("%dh %dm", h, m)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%dh", h)
|
||||||
|
}
|
||||||
|
days := int(d.Hours()) / 24
|
||||||
|
hours := int(d.Hours()) % 24
|
||||||
|
if hours > 0 {
|
||||||
|
return fmt.Sprintf("%dd %dh", days, hours)
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%dd", days)
|
||||||
|
}
|
||||||
|
|
||||||
func (m Model) dynamicWidths() (nameW, sparkW int) {
|
func (m Model) dynamicWidths() (nameW, sparkW int) {
|
||||||
fixed := 6 + 10 + 10 + 8 + 8 + 7 + 9 // #, TYPE, STATUS, LATENCY, UPTIME, SSL, RETRY
|
fixed := 6 + 10 + 10 + 8 + 8 + 7 + 9 // #, TYPE, STATUS, LATENCY, UPTIME, SSL, RETRY
|
||||||
overhead := 30 // cell padding + borders
|
overhead := 30 // cell padding + borders
|
||||||
@@ -389,6 +412,14 @@ func (m Model) viewSitesTab() string {
|
|||||||
name = limitStr(name, nameW)
|
name = limitStr(name, nameW)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (site.Status == "DOWN" || site.Status == "SSL EXP") && site.LastError != "" {
|
||||||
|
nameLen := len([]rune(name))
|
||||||
|
errSpace := nameW - nameLen - 1
|
||||||
|
if errSpace > 10 {
|
||||||
|
name = name + " " + subtleStyle.Render(limitStr(site.LastError, errSpace))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
hist, _ := m.engine.GetHistory(site.ID)
|
hist, _ := m.engine.GetHistory(site.ID)
|
||||||
var spark string
|
var spark string
|
||||||
if site.Type == "push" {
|
if site.Type == "push" {
|
||||||
@@ -732,6 +763,25 @@ func (m Model) viewDetailPanel() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
row("Status", fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)))
|
row("Status", fmtStatus(site.Status, site.Paused, m.isMonitorInMaintenance(site.ID)))
|
||||||
|
|
||||||
|
if (site.Status == "DOWN" || site.Status == "SSL EXP") && site.LastError != "" {
|
||||||
|
row("Error", dangerStyle.Render(limitStr(site.LastError, 60)))
|
||||||
|
}
|
||||||
|
|
||||||
|
if site.Type == "http" && site.StatusCode > 0 {
|
||||||
|
row("HTTP Code", strconv.Itoa(site.StatusCode))
|
||||||
|
}
|
||||||
|
|
||||||
|
if !site.StatusChangedAt.IsZero() {
|
||||||
|
dur := time.Since(site.StatusChangedAt)
|
||||||
|
row("State Since", site.StatusChangedAt.Format("2006-01-02 15:04:05")+" ("+fmtDuration(dur)+")")
|
||||||
|
}
|
||||||
|
|
||||||
|
if !site.LastSuccessAt.IsZero() {
|
||||||
|
ago := time.Since(site.LastSuccessAt)
|
||||||
|
row("Last Success", site.LastSuccessAt.Format("15:04:05")+" ("+fmtDuration(ago)+" ago)")
|
||||||
|
}
|
||||||
|
|
||||||
if m.isMonitorInMaintenance(site.ID) {
|
if m.isMonitorInMaintenance(site.ID) {
|
||||||
for _, mw := range m.maintenanceWindows {
|
for _, mw := range m.maintenanceWindows {
|
||||||
if mw.Type == "maintenance" && (mw.MonitorID == 0 || mw.MonitorID == site.ID || mw.MonitorID == site.ParentID) {
|
if mw.Type == "maintenance" && (mw.MonitorID == 0 || mw.MonitorID == site.ID || mw.MonitorID == site.ParentID) {
|
||||||
@@ -787,7 +837,30 @@ func (m Model) viewDetailPanel() string {
|
|||||||
}
|
}
|
||||||
latency := time.Duration(result.LatencyNs).Milliseconds()
|
latency := time.Duration(result.LatencyNs).Milliseconds()
|
||||||
ago := time.Since(result.CheckedAt).Truncate(time.Second)
|
ago := time.Since(result.CheckedAt).Truncate(time.Second)
|
||||||
fmt.Fprintf(&b, " %-14s %s %dms %s ago\n", nodeID, status, latency, ago)
|
line := fmt.Sprintf(" %-14s %s %dms %s ago", nodeID, status, latency, ago)
|
||||||
|
if !result.IsUp && result.ErrorReason != "" {
|
||||||
|
line += " " + dangerStyle.Render(limitStr(result.ErrorReason, 30))
|
||||||
|
}
|
||||||
|
b.WriteString(line + "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stateChanges := m.engine.GetStateChanges(site.ID, 5)
|
||||||
|
if len(stateChanges) > 0 {
|
||||||
|
b.WriteString("\n" + subtleStyle.Render(" STATE CHANGES") + "\n")
|
||||||
|
for _, sc := range stateChanges {
|
||||||
|
ago := fmtDuration(time.Since(sc.ChangedAt))
|
||||||
|
arrow := subtleStyle.Render(sc.FromStatus) + " → "
|
||||||
|
if sc.ToStatus == "UP" {
|
||||||
|
arrow += specialStyle.Render(sc.ToStatus)
|
||||||
|
} else {
|
||||||
|
arrow += dangerStyle.Render(sc.ToStatus)
|
||||||
|
}
|
||||||
|
line := fmt.Sprintf(" %s %s", arrow, subtleStyle.Render(ago+" ago"))
|
||||||
|
if sc.ErrorReason != "" && sc.ToStatus != "UP" {
|
||||||
|
line += " " + dangerStyle.Render(limitStr(sc.ErrorReason, 40))
|
||||||
|
}
|
||||||
|
b.WriteString(line + "\n")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user