fix(monitor): add jitter to check intervals and stagger startup
Monitors with the same interval no longer fire simultaneously. Each tick adds up to 10% random jitter. Initial checks stagger over 0-3s to avoid thundering herd on startup.
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
|||||||
"go-upkeep/internal/alert"
|
"go-upkeep/internal/alert"
|
||||||
"go-upkeep/internal/models"
|
"go-upkeep/internal/models"
|
||||||
"go-upkeep/internal/store"
|
"go-upkeep/internal/store"
|
||||||
|
"math/rand/v2"
|
||||||
"net/http"
|
"net/http"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -277,6 +278,14 @@ func (e *Engine) ToggleSitePause(id int) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (e *Engine) monitorRoutine(ctx context.Context, id int) {
|
func (e *Engine) monitorRoutine(ctx context.Context, id int) {
|
||||||
|
// Stagger initial check to avoid thundering herd on startup
|
||||||
|
stagger := time.Duration(rand.IntN(3000)) * time.Millisecond
|
||||||
|
select {
|
||||||
|
case <-time.After(stagger):
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
e.checkByID(id)
|
e.checkByID(id)
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
@@ -314,8 +323,9 @@ func (e *Engine) monitorRoutine(ctx context.Context, id int) {
|
|||||||
if interval < 5 {
|
if interval < 5 {
|
||||||
interval = 5
|
interval = 5
|
||||||
}
|
}
|
||||||
|
jitter := time.Duration(rand.IntN(interval*100)) * time.Millisecond
|
||||||
select {
|
select {
|
||||||
case <-time.After(time.Duration(interval) * time.Second):
|
case <-time.After(time.Duration(interval)*time.Second + jitter):
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user