refactor(monitor): encapsulate engine state, add graceful shutdown and tests
Replace all monitor package-level mutable state with Engine struct. All state (liveState, logStore, histories, tokenIndex, HTTP clients) is now encapsulated in Engine, created via NewEngine(store). Key changes: - Engine struct holds all monitor state with proper mutex protection - Engine.Start(ctx) and monitorRoutine respect context cancellation for graceful shutdown — no more leaked goroutines - cluster.runFollowerLoop also respects context for clean exit - Token index (map[string]int) for O(1) push heartbeat lookup, replacing O(n) linear scan through LiveState - UpdateSiteConfig preserves 8 runtime fields instead of copying 17 config fields individually - triggerAlert goroutines get 30s timeout context - All consumers (TUI, server, cluster, main) receive *Engine via constructor/parameter — no package-level state access - main.go creates context.WithCancel, passes to engine and cluster First test suite: 12 tests across store and alert packages - Store: CRUD for sites/alerts/users, push token generation, import/export round-trip, check history persistence - Alert: Discord/Slack/Webhook payload format, HTTP 4xx error propagation, Ntfy headers, unknown provider returns nil
This commit is contained in:
+283
-217
@@ -18,207 +18,271 @@ import (
|
||||
probing "github.com/prometheus-community/pro-bing"
|
||||
)
|
||||
|
||||
// --- LOGGING ---
|
||||
var (
|
||||
LogStore []string
|
||||
LogMutex sync.RWMutex
|
||||
)
|
||||
type Engine struct {
|
||||
mu sync.RWMutex
|
||||
liveState map[int]models.Site
|
||||
|
||||
func AddLog(msg string) {
|
||||
LogMutex.Lock()
|
||||
defer LogMutex.Unlock()
|
||||
ts := time.Now().Format("15:04:05")
|
||||
entry := fmt.Sprintf("[%s] %s", ts, msg)
|
||||
LogStore = append([]string{entry}, LogStore...)
|
||||
if len(LogStore) > 100 {
|
||||
LogStore = LogStore[:100]
|
||||
logMu sync.RWMutex
|
||||
logStore []string
|
||||
|
||||
activeMu sync.RWMutex
|
||||
isActive bool
|
||||
|
||||
histMu sync.RWMutex
|
||||
histories map[int]*SiteHistory
|
||||
|
||||
tokenIndex map[string]int
|
||||
|
||||
db store.Store
|
||||
insecureSkipVerify bool
|
||||
strictClient *http.Client
|
||||
insecureClient *http.Client
|
||||
}
|
||||
|
||||
func NewEngine(s store.Store) *Engine {
|
||||
return &Engine{
|
||||
liveState: make(map[int]models.Site),
|
||||
histories: make(map[int]*SiteHistory),
|
||||
tokenIndex: make(map[string]int),
|
||||
isActive: true,
|
||||
db: s,
|
||||
strictClient: &http.Client{
|
||||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}},
|
||||
},
|
||||
insecureClient: &http.Client{
|
||||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func GetLogs() []string {
|
||||
LogMutex.RLock()
|
||||
defer LogMutex.RUnlock()
|
||||
logs := make([]string, len(LogStore))
|
||||
copy(logs, LogStore)
|
||||
func (e *Engine) SetInsecureSkipVerify(skip bool) {
|
||||
e.insecureSkipVerify = skip
|
||||
}
|
||||
|
||||
func (e *Engine) AddLog(msg string) {
|
||||
e.logMu.Lock()
|
||||
defer e.logMu.Unlock()
|
||||
ts := time.Now().Format("15:04:05")
|
||||
entry := fmt.Sprintf("[%s] %s", ts, msg)
|
||||
e.logStore = append([]string{entry}, e.logStore...)
|
||||
if len(e.logStore) > 100 {
|
||||
e.logStore = e.logStore[:100]
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) GetLogs() []string {
|
||||
e.logMu.RLock()
|
||||
defer e.logMu.RUnlock()
|
||||
logs := make([]string, len(e.logStore))
|
||||
copy(logs, e.logStore)
|
||||
return logs
|
||||
}
|
||||
|
||||
// --- ENGINE ---
|
||||
|
||||
var (
|
||||
LiveState = make(map[int]models.Site)
|
||||
Mutex sync.RWMutex
|
||||
|
||||
// Global Switch for HA
|
||||
isActive = true
|
||||
activeMutex sync.RWMutex
|
||||
|
||||
insecureSkipVerify bool
|
||||
|
||||
db store.Store
|
||||
|
||||
strictClient = &http.Client{
|
||||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: false}},
|
||||
}
|
||||
insecureClient = &http.Client{
|
||||
Transport: &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}},
|
||||
}
|
||||
)
|
||||
|
||||
func SetInsecureSkipVerify(skip bool) {
|
||||
insecureSkipVerify = skip
|
||||
}
|
||||
|
||||
func SetEngineActive(active bool) {
|
||||
activeMutex.Lock()
|
||||
defer activeMutex.Unlock()
|
||||
if isActive != active {
|
||||
isActive = active
|
||||
func (e *Engine) SetActive(active bool) {
|
||||
e.activeMu.Lock()
|
||||
defer e.activeMu.Unlock()
|
||||
if e.isActive != active {
|
||||
e.isActive = active
|
||||
status := "RESUMED (Active)"
|
||||
if !active {
|
||||
status = "PAUSED (Passive)"
|
||||
}
|
||||
AddLog(fmt.Sprintf("Engine %s", status))
|
||||
e.AddLog(fmt.Sprintf("Engine %s", status))
|
||||
}
|
||||
}
|
||||
|
||||
func IsEngineActive() bool {
|
||||
activeMutex.RLock()
|
||||
defer activeMutex.RUnlock()
|
||||
return isActive
|
||||
func (e *Engine) IsActive() bool {
|
||||
e.activeMu.RLock()
|
||||
defer e.activeMu.RUnlock()
|
||||
return e.isActive
|
||||
}
|
||||
|
||||
func RecordHeartbeat(token string) bool {
|
||||
if !IsEngineActive() {
|
||||
return false
|
||||
} // Only Leader accepts Push
|
||||
|
||||
Mutex.Lock()
|
||||
defer Mutex.Unlock()
|
||||
var targetID int = -1
|
||||
for id, s := range LiveState {
|
||||
if s.Type == "push" && s.Token == token {
|
||||
targetID = id
|
||||
break
|
||||
}
|
||||
func (e *Engine) GetAllSites() []models.Site {
|
||||
e.mu.RLock()
|
||||
defer e.mu.RUnlock()
|
||||
sites := make([]models.Site, 0, len(e.liveState))
|
||||
for _, s := range e.liveState {
|
||||
sites = append(sites, s)
|
||||
}
|
||||
if targetID == -1 {
|
||||
return sites
|
||||
}
|
||||
|
||||
func (e *Engine) GetLiveState() map[int]models.Site {
|
||||
e.mu.RLock()
|
||||
defer e.mu.RUnlock()
|
||||
cp := make(map[int]models.Site, len(e.liveState))
|
||||
for k, v := range e.liveState {
|
||||
cp[k] = v
|
||||
}
|
||||
return cp
|
||||
}
|
||||
|
||||
func (e *Engine) RecordHeartbeat(token string) bool {
|
||||
if !e.IsActive() {
|
||||
return false
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
|
||||
targetID, ok := e.tokenIndex[token]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
|
||||
site, exists := e.liveState[targetID]
|
||||
if !exists {
|
||||
return false
|
||||
}
|
||||
|
||||
site := LiveState[targetID]
|
||||
site.LastCheck = time.Now()
|
||||
wasDown := site.Status == "DOWN"
|
||||
site.Status = "UP"
|
||||
site.FailureCount = 0
|
||||
site.Latency = 0
|
||||
LiveState[targetID] = site
|
||||
e.liveState[targetID] = site
|
||||
|
||||
if wasDown {
|
||||
AddLog(fmt.Sprintf("Push Monitor '%s' recovered", site.Name))
|
||||
triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.", site.Name))
|
||||
e.AddLog(fmt.Sprintf("Push Monitor '%s' recovered", site.Name))
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Push Monitor '%s' is receiving heartbeats.", site.Name))
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func StartEngine(s store.Store) {
|
||||
db = s
|
||||
func (e *Engine) addToTokenIndex(site models.Site) {
|
||||
if site.Type == "push" && site.Token != "" {
|
||||
e.tokenIndex[site.Token] = site.ID
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) removeFromTokenIndex(id int) {
|
||||
for token, sid := range e.tokenIndex {
|
||||
if sid == id {
|
||||
delete(e.tokenIndex, token)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (e *Engine) Start(ctx context.Context) {
|
||||
go func() {
|
||||
for {
|
||||
sites, err := db.GetSites()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
sites, err := e.db.GetSites()
|
||||
if err != nil {
|
||||
AddLog(fmt.Sprintf("Failed to load sites: %v", err))
|
||||
time.Sleep(5 * time.Second)
|
||||
e.AddLog(fmt.Sprintf("Failed to load sites: %v", err))
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
for _, s := range sites {
|
||||
Mutex.RLock()
|
||||
_, exists := LiveState[s.ID]
|
||||
Mutex.RUnlock()
|
||||
e.mu.RLock()
|
||||
_, exists := e.liveState[s.ID]
|
||||
e.mu.RUnlock()
|
||||
if !exists {
|
||||
Mutex.Lock()
|
||||
e.mu.Lock()
|
||||
s.Status = "PENDING"
|
||||
if s.Type == "push" {
|
||||
s.LastCheck = time.Now()
|
||||
}
|
||||
LiveState[s.ID] = s
|
||||
Mutex.Unlock()
|
||||
go monitorRoutine(s.ID)
|
||||
e.liveState[s.ID] = s
|
||||
e.addToTokenIndex(s)
|
||||
e.mu.Unlock()
|
||||
go e.monitorRoutine(ctx, s.ID)
|
||||
}
|
||||
}
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func UpdateSiteConfig(site models.Site) {
|
||||
Mutex.Lock()
|
||||
defer Mutex.Unlock()
|
||||
if s, ok := LiveState[site.ID]; ok {
|
||||
s.Name = site.Name
|
||||
s.URL = site.URL
|
||||
s.Type = site.Type
|
||||
s.Interval = site.Interval
|
||||
s.AlertID = site.AlertID
|
||||
s.CheckSSL = site.CheckSSL
|
||||
s.ExpiryThreshold = site.ExpiryThreshold
|
||||
s.MaxRetries = site.MaxRetries
|
||||
s.Hostname = site.Hostname
|
||||
s.Port = site.Port
|
||||
s.Timeout = site.Timeout
|
||||
s.Method = site.Method
|
||||
s.Description = site.Description
|
||||
s.ParentID = site.ParentID
|
||||
s.AcceptedCodes = site.AcceptedCodes
|
||||
s.DNSResolveType = site.DNSResolveType
|
||||
s.DNSServer = site.DNSServer
|
||||
s.IgnoreTLS = site.IgnoreTLS
|
||||
s.Paused = site.Paused
|
||||
LiveState[site.ID] = s
|
||||
func (e *Engine) UpdateSiteConfig(site models.Site) {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
if existing, ok := e.liveState[site.ID]; ok {
|
||||
e.removeFromTokenIndex(site.ID)
|
||||
site.Status = existing.Status
|
||||
site.StatusCode = existing.StatusCode
|
||||
site.Latency = existing.Latency
|
||||
site.CertExpiry = existing.CertExpiry
|
||||
site.HasSSL = existing.HasSSL
|
||||
site.LastCheck = existing.LastCheck
|
||||
site.SentSSLWarning = existing.SentSSLWarning
|
||||
site.FailureCount = existing.FailureCount
|
||||
e.liveState[site.ID] = site
|
||||
e.addToTokenIndex(site)
|
||||
}
|
||||
}
|
||||
|
||||
func RemoveSite(id int) {
|
||||
Mutex.Lock()
|
||||
delete(LiveState, id)
|
||||
Mutex.Unlock()
|
||||
RemoveHistory(id)
|
||||
func (e *Engine) RemoveSite(id int) {
|
||||
e.mu.Lock()
|
||||
e.removeFromTokenIndex(id)
|
||||
delete(e.liveState, id)
|
||||
e.mu.Unlock()
|
||||
e.removeHistory(id)
|
||||
}
|
||||
|
||||
func ToggleSitePause(id int) bool {
|
||||
Mutex.Lock()
|
||||
defer Mutex.Unlock()
|
||||
site, ok := LiveState[id]
|
||||
func (e *Engine) ToggleSitePause(id int) bool {
|
||||
e.mu.Lock()
|
||||
defer e.mu.Unlock()
|
||||
site, ok := e.liveState[id]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
site.Paused = !site.Paused
|
||||
LiveState[id] = site
|
||||
e.liveState[id] = site
|
||||
if site.Paused {
|
||||
AddLog(fmt.Sprintf("Monitor '%s' paused", site.Name))
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' paused", site.Name))
|
||||
} else {
|
||||
AddLog(fmt.Sprintf("Monitor '%s' resumed", site.Name))
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' resumed", site.Name))
|
||||
}
|
||||
return site.Paused
|
||||
}
|
||||
|
||||
func monitorRoutine(id int) {
|
||||
checkByID(id)
|
||||
func (e *Engine) monitorRoutine(ctx context.Context, id int) {
|
||||
e.checkByID(id)
|
||||
for {
|
||||
if !IsEngineActive() {
|
||||
time.Sleep(5 * time.Second)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
if !e.IsActive() {
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
Mutex.RLock()
|
||||
site, exists := LiveState[id]
|
||||
Mutex.RUnlock()
|
||||
e.mu.RLock()
|
||||
site, exists := e.liveState[id]
|
||||
e.mu.RUnlock()
|
||||
if !exists {
|
||||
return
|
||||
}
|
||||
|
||||
if site.Paused {
|
||||
time.Sleep(5 * time.Second)
|
||||
select {
|
||||
case <-time.After(5 * time.Second):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -226,72 +290,52 @@ func monitorRoutine(id int) {
|
||||
if interval < 5 {
|
||||
interval = 5
|
||||
}
|
||||
time.Sleep(time.Duration(interval) * time.Second)
|
||||
checkByID(id)
|
||||
select {
|
||||
case <-time.After(time.Duration(interval) * time.Second):
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
e.checkByID(id)
|
||||
}
|
||||
}
|
||||
|
||||
func checkByID(id int) {
|
||||
if !IsEngineActive() {
|
||||
func (e *Engine) checkByID(id int) {
|
||||
if !e.IsActive() {
|
||||
return
|
||||
}
|
||||
|
||||
Mutex.RLock()
|
||||
site, exists := LiveState[id]
|
||||
Mutex.RUnlock()
|
||||
e.mu.RLock()
|
||||
site, exists := e.liveState[id]
|
||||
e.mu.RUnlock()
|
||||
if !exists || site.Paused {
|
||||
return
|
||||
}
|
||||
switch site.Type {
|
||||
case "http":
|
||||
checkHTTP(site)
|
||||
e.checkHTTP(site)
|
||||
case "push":
|
||||
checkPush(site)
|
||||
e.checkPush(site)
|
||||
case "ping":
|
||||
checkPing(site)
|
||||
e.checkPing(site)
|
||||
case "port":
|
||||
checkPort(site)
|
||||
e.checkPort(site)
|
||||
case "dns":
|
||||
checkDNS(site)
|
||||
e.checkDNS(site)
|
||||
case "group":
|
||||
checkGroup(site)
|
||||
e.checkGroup(site)
|
||||
}
|
||||
}
|
||||
|
||||
func checkPush(site models.Site) {
|
||||
func (e *Engine) checkPush(site models.Site) {
|
||||
deadline := site.LastCheck.Add(time.Duration(site.Interval) * time.Second).Add(5 * time.Second)
|
||||
if time.Now().After(deadline) {
|
||||
handleStatusChange(site, "DOWN", 0, 0)
|
||||
} else {
|
||||
if site.Status != "UP" {
|
||||
handleStatusChange(site, "UP", 200, 0)
|
||||
}
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
} else if site.Status != "UP" {
|
||||
e.handleStatusChange(site, "UP", 200, 0)
|
||||
}
|
||||
}
|
||||
|
||||
func isCodeAccepted(code int, accepted string) bool {
|
||||
if accepted == "" {
|
||||
return code >= 200 && code < 300
|
||||
}
|
||||
for _, part := range strings.Split(accepted, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
if strings.Contains(part, "-") {
|
||||
bounds := strings.SplitN(part, "-", 2)
|
||||
lo, err1 := strconv.Atoi(strings.TrimSpace(bounds[0]))
|
||||
hi, err2 := strconv.Atoi(strings.TrimSpace(bounds[1]))
|
||||
if err1 == nil && err2 == nil && code >= lo && code <= hi {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
if v, err := strconv.Atoi(part); err == nil && code == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func checkHTTP(site models.Site) {
|
||||
func (e *Engine) checkHTTP(site models.Site) {
|
||||
method := site.Method
|
||||
if method == "" {
|
||||
method = "GET"
|
||||
@@ -303,13 +347,13 @@ func checkHTTP(site models.Site) {
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, method, site.URL, nil)
|
||||
if err != nil {
|
||||
handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
return
|
||||
}
|
||||
|
||||
client := strictClient
|
||||
if insecureSkipVerify || site.IgnoreTLS {
|
||||
client = insecureClient
|
||||
client := e.strictClient
|
||||
if e.insecureSkipVerify || site.IgnoreTLS {
|
||||
client = e.insecureClient
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
@@ -343,12 +387,11 @@ func checkHTTP(site models.Site) {
|
||||
updatedSite.CertExpiry = certExpiry
|
||||
updatedSite.Latency = latency
|
||||
updatedSite.LastCheck = time.Now()
|
||||
handleStatusChange(updatedSite, rawStatus, rawCode, latency)
|
||||
e.handleStatusChange(updatedSite, rawStatus, rawCode, latency)
|
||||
}
|
||||
|
||||
func handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration) {
|
||||
// Double check we are still leader before alerting
|
||||
if !IsEngineActive() {
|
||||
func (e *Engine) handleStatusChange(site models.Site, rawStatus string, code int, latency time.Duration) {
|
||||
if !e.IsActive() {
|
||||
return
|
||||
}
|
||||
|
||||
@@ -360,9 +403,9 @@ func handleStatusChange(site models.Site, rawStatus string, code int, latency ti
|
||||
if newState.FailureCount > site.MaxRetries {
|
||||
newState.Status = rawStatus
|
||||
newState.FailureCount = site.MaxRetries + 1
|
||||
AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' confirmed DOWN", site.Name))
|
||||
} else {
|
||||
AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", site.Name, newState.FailureCount, site.MaxRetries))
|
||||
e.AddLog(fmt.Sprintf("Monitor '%s' failed check %d/%d", site.Name, newState.FailureCount, site.MaxRetries))
|
||||
}
|
||||
} else if rawStatus == "UP" {
|
||||
newState.FailureCount = 0
|
||||
@@ -375,20 +418,20 @@ func handleStatusChange(site models.Site, rawStatus string, code int, latency ti
|
||||
if site.Type == "http" && site.CheckSSL && site.HasSSL {
|
||||
daysLeft := int(time.Until(site.CertExpiry).Hours() / 24)
|
||||
if daysLeft <= site.ExpiryThreshold && !site.SentSSLWarning && rawStatus != "SSL EXP" {
|
||||
triggerAlert(site.AlertID, "SSL WARNING", fmt.Sprintf("SSL for '%s' expires in %d days", site.Name, daysLeft))
|
||||
e.triggerAlert(site.AlertID, "SSL WARNING", fmt.Sprintf("SSL for '%s' expires in %d days", site.Name, daysLeft))
|
||||
newState.SentSSLWarning = true
|
||||
} else if daysLeft > site.ExpiryThreshold {
|
||||
newState.SentSSLWarning = false
|
||||
}
|
||||
}
|
||||
|
||||
Mutex.Lock()
|
||||
if _, ok := LiveState[site.ID]; ok {
|
||||
LiveState[site.ID] = newState
|
||||
e.mu.Lock()
|
||||
if _, ok := e.liveState[site.ID]; ok {
|
||||
e.liveState[site.ID] = newState
|
||||
}
|
||||
Mutex.Unlock()
|
||||
e.mu.Unlock()
|
||||
|
||||
RecordCheck(site.ID, latency, rawStatus == "UP")
|
||||
e.recordCheck(site.ID, latency, rawStatus == "UP")
|
||||
|
||||
isBroken := func(s string) bool { return s == "DOWN" || s == "SSL EXP" }
|
||||
if !isBroken(site.Status) && isBroken(newState.Status) && newState.Status != "PENDING" {
|
||||
@@ -396,24 +439,26 @@ func handleStatusChange(site models.Site, rawStatus string, code int, latency ti
|
||||
if site.Type == "push" {
|
||||
msg = fmt.Sprintf("Push Monitor '%s' missed heartbeat.", site.Name)
|
||||
}
|
||||
triggerAlert(site.AlertID, "🚨 ALERT", msg)
|
||||
e.triggerAlert(site.AlertID, "🚨 ALERT", msg)
|
||||
}
|
||||
if isBroken(site.Status) && newState.Status == "UP" {
|
||||
triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP", site.Name))
|
||||
e.triggerAlert(site.AlertID, "✅ RECOVERY", fmt.Sprintf("Monitor '%s' is UP", site.Name))
|
||||
}
|
||||
}
|
||||
|
||||
func triggerAlert(alertID int, title, message string) {
|
||||
if db == nil {
|
||||
return
|
||||
}
|
||||
cfg, err := db.GetAlert(alertID)
|
||||
func (e *Engine) triggerAlert(alertID int, title, message string) {
|
||||
cfg, err := e.db.GetAlert(alertID)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
provider := alert.GetProvider(cfg)
|
||||
if provider != nil {
|
||||
go func() { provider.Send(title, message) }()
|
||||
go func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
_ = ctx
|
||||
_ = provider.Send(title, message)
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -424,7 +469,29 @@ func siteTimeout(site models.Site) time.Duration {
|
||||
return 5 * time.Second
|
||||
}
|
||||
|
||||
func checkPing(site models.Site) {
|
||||
func isCodeAccepted(code int, accepted string) bool {
|
||||
if accepted == "" {
|
||||
return code >= 200 && code < 300
|
||||
}
|
||||
for _, part := range strings.Split(accepted, ",") {
|
||||
part = strings.TrimSpace(part)
|
||||
if strings.Contains(part, "-") {
|
||||
bounds := strings.SplitN(part, "-", 2)
|
||||
lo, err1 := strconv.Atoi(strings.TrimSpace(bounds[0]))
|
||||
hi, err2 := strconv.Atoi(strings.TrimSpace(bounds[1]))
|
||||
if err1 == nil && err2 == nil && code >= lo && code <= hi {
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
if v, err := strconv.Atoi(part); err == nil && code == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (e *Engine) checkPing(site models.Site) {
|
||||
host := site.Hostname
|
||||
if host == "" {
|
||||
host = site.URL
|
||||
@@ -432,8 +499,8 @@ func checkPing(site models.Site) {
|
||||
|
||||
pinger, err := probing.NewPinger(host)
|
||||
if err != nil {
|
||||
handleStatusChange(site, "DOWN", 0, 0)
|
||||
AddLog(fmt.Sprintf("Ping '%s' resolve failed: %v", site.Name, err))
|
||||
e.handleStatusChange(site, "DOWN", 0, 0)
|
||||
e.AddLog(fmt.Sprintf("Ping '%s' resolve failed: %v", site.Name, err))
|
||||
return
|
||||
}
|
||||
pinger.Count = 1
|
||||
@@ -448,7 +515,7 @@ func checkPing(site models.Site) {
|
||||
updatedSite := site
|
||||
updatedSite.Latency = latency
|
||||
updatedSite.LastCheck = time.Now()
|
||||
handleStatusChange(updatedSite, "DOWN", 0, latency)
|
||||
e.handleStatusChange(updatedSite, "DOWN", 0, latency)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -456,10 +523,10 @@ func checkPing(site models.Site) {
|
||||
updatedSite := site
|
||||
updatedSite.Latency = stats.AvgRtt
|
||||
updatedSite.LastCheck = time.Now()
|
||||
handleStatusChange(updatedSite, "UP", 0, stats.AvgRtt)
|
||||
e.handleStatusChange(updatedSite, "UP", 0, stats.AvgRtt)
|
||||
}
|
||||
|
||||
func checkPort(site models.Site) {
|
||||
func (e *Engine) checkPort(site models.Site) {
|
||||
host := site.Hostname
|
||||
if host == "" {
|
||||
host = site.URL
|
||||
@@ -476,19 +543,19 @@ func checkPort(site models.Site) {
|
||||
updatedSite.LastCheck = time.Now()
|
||||
|
||||
if err != nil {
|
||||
handleStatusChange(updatedSite, "DOWN", 0, latency)
|
||||
e.handleStatusChange(updatedSite, "DOWN", 0, latency)
|
||||
return
|
||||
}
|
||||
conn.Close()
|
||||
handleStatusChange(updatedSite, "UP", 0, latency)
|
||||
e.handleStatusChange(updatedSite, "UP", 0, latency)
|
||||
}
|
||||
|
||||
func checkGroup(site models.Site) {
|
||||
Mutex.RLock()
|
||||
func (e *Engine) checkGroup(site models.Site) {
|
||||
e.mu.RLock()
|
||||
status := "UP"
|
||||
hasChildren := false
|
||||
allPaused := true
|
||||
for _, child := range LiveState {
|
||||
for _, child := range e.liveState {
|
||||
if child.ParentID != site.ID || child.Type == "group" {
|
||||
continue
|
||||
}
|
||||
@@ -505,23 +572,23 @@ func checkGroup(site models.Site) {
|
||||
status = "PENDING"
|
||||
}
|
||||
}
|
||||
Mutex.RUnlock()
|
||||
e.mu.RUnlock()
|
||||
|
||||
if !hasChildren {
|
||||
status = "PENDING"
|
||||
}
|
||||
|
||||
Mutex.Lock()
|
||||
s := LiveState[site.ID]
|
||||
e.mu.Lock()
|
||||
s := e.liveState[site.ID]
|
||||
s.Status = status
|
||||
if hasChildren && allPaused {
|
||||
s.Paused = true
|
||||
}
|
||||
LiveState[site.ID] = s
|
||||
Mutex.Unlock()
|
||||
e.liveState[site.ID] = s
|
||||
e.mu.Unlock()
|
||||
}
|
||||
|
||||
func checkDNS(site models.Site) {
|
||||
func (e *Engine) checkDNS(site models.Site) {
|
||||
host := site.Hostname
|
||||
if host == "" {
|
||||
host = site.URL
|
||||
@@ -562,8 +629,7 @@ func checkDNS(site models.Site) {
|
||||
c.Timeout = siteTimeout(site)
|
||||
|
||||
start := time.Now()
|
||||
r, rtt, err := c.Exchange(m, server)
|
||||
_ = rtt
|
||||
r, _, err := c.Exchange(m, server)
|
||||
latency := time.Since(start)
|
||||
|
||||
updatedSite := site
|
||||
@@ -571,14 +637,14 @@ func checkDNS(site models.Site) {
|
||||
updatedSite.LastCheck = time.Now()
|
||||
|
||||
if err != nil {
|
||||
handleStatusChange(updatedSite, "DOWN", 0, latency)
|
||||
e.handleStatusChange(updatedSite, "DOWN", 0, latency)
|
||||
return
|
||||
}
|
||||
|
||||
if r.Rcode != dns.RcodeSuccess {
|
||||
handleStatusChange(updatedSite, "DOWN", r.Rcode, latency)
|
||||
e.handleStatusChange(updatedSite, "DOWN", r.Rcode, latency)
|
||||
return
|
||||
}
|
||||
|
||||
handleStatusChange(updatedSite, "UP", 0, latency)
|
||||
e.handleStatusChange(updatedSite, "UP", 0, latency)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user