diff --git a/internal/monitor/monitor.go b/internal/monitor/monitor.go index 3d0b01e..2db743f 100644 --- a/internal/monitor/monitor.go +++ b/internal/monitor/monitor.go @@ -53,6 +53,9 @@ type Engine struct { alertHealthMu sync.RWMutex alertHealth map[int]AlertHealth + recheckMu sync.RWMutex + recheck map[int]chan struct{} + db store.Store insecureSkipVerify bool allowPrivateTargets bool @@ -74,6 +77,7 @@ func newEngine(s store.Store, allowPrivateTargets bool) *Engine { liveState: make(map[int]models.Site), histories: make(map[int]*SiteHistory), tokenIndex: make(map[string]int), + recheck: make(map[int]chan struct{}), probeResults: make(map[int]map[string]NodeResult), alertHealth: make(map[int]AlertHealth), aggStrategy: AggAnyDown, @@ -335,7 +339,6 @@ func (e *Engine) Start(ctx context.Context) { func (e *Engine) UpdateSiteConfig(site models.Site) { e.mu.Lock() - defer e.mu.Unlock() if existing, ok := e.liveState[site.ID]; ok { e.removeFromTokenIndex(site.ID) site.Status = existing.Status @@ -352,6 +355,28 @@ func (e *Engine) UpdateSiteConfig(site models.Site) { e.liveState[site.ID] = site e.addToTokenIndex(site) } + e.mu.Unlock() + + e.signalRecheck(site.ID) +} + +func (e *Engine) getRecheckChan(id int) chan struct{} { + e.recheckMu.Lock() + defer e.recheckMu.Unlock() + ch, ok := e.recheck[id] + if !ok { + ch = make(chan struct{}, 1) + e.recheck[id] = ch + } + return ch +} + +func (e *Engine) signalRecheck(id int) { + ch := e.getRecheckChan(id) + select { + case ch <- struct{}{}: + default: + } } func (e *Engine) RemoveSite(id int) { @@ -360,6 +385,10 @@ func (e *Engine) RemoveSite(id int) { delete(e.liveState, id) e.mu.Unlock() e.removeHistory(id) + + e.recheckMu.Lock() + delete(e.recheck, id) + e.recheckMu.Unlock() } func (e *Engine) ToggleSitePause(id int) bool { @@ -380,6 +409,8 @@ func (e *Engine) ToggleSitePause(id int) bool { } func (e *Engine) monitorRoutine(ctx context.Context, id int) { + recheckCh := e.getRecheckChan(id) + // Stagger initial check to avoid thundering herd on startup stagger := time.Duration(rand.IntN(3000)) * time.Millisecond //nolint:gosec // non-security jitter select { @@ -401,6 +432,7 @@ func (e *Engine) monitorRoutine(ctx context.Context, id int) { case <-time.After(pollInterval): case <-ctx.Done(): return + case <-recheckCh: } continue } @@ -417,6 +449,7 @@ func (e *Engine) monitorRoutine(ctx context.Context, id int) { case <-time.After(pollInterval): case <-ctx.Done(): return + case <-recheckCh: } continue } @@ -430,6 +463,7 @@ func (e *Engine) monitorRoutine(ctx context.Context, id int) { case <-time.After(time.Duration(interval)*time.Second + jitter): case <-ctx.Done(): return + case <-recheckCh: } e.checkByID(id) } diff --git a/internal/tui/format.go b/internal/tui/format.go index 3c87f1d..a2e553d 100644 --- a/internal/tui/format.go +++ b/internal/tui/format.go @@ -110,11 +110,7 @@ func fmtSSL(site models.Site) string { } func fmtRetries(site models.Site) string { - retriesDone := site.FailureCount - 1 - if retriesDone < 0 { - retriesDone = 0 - } - dispCount := retriesDone + dispCount := site.FailureCount if dispCount > site.MaxRetries { dispCount = site.MaxRetries } diff --git a/internal/tui/tui.go b/internal/tui/tui.go index 534e90a..ab76f65 100644 --- a/internal/tui/tui.go +++ b/internal/tui/tui.go @@ -70,6 +70,7 @@ const ( stateFormUser stateConfirmDelete stateFormMaint + stateHistory ) type Model struct { @@ -91,8 +92,12 @@ type Model struct { logViewport viewport.Model logFilterImportant bool - isAdmin bool - zones *zone.Manager + + historyViewport viewport.Model + historyChanges []models.StateChange + historySiteName string + isAdmin bool + zones *zone.Manager deleteID int deleteName string diff --git a/internal/tui/update.go b/internal/tui/update.go index af9dcc3..84e2bbb 100644 --- a/internal/tui/update.go +++ b/internal/tui/update.go @@ -4,11 +4,19 @@ import ( "fmt" "time" + "github.com/charmbracelet/bubbles/viewport" tea "github.com/charmbracelet/bubbletea" "github.com/charmbracelet/huh" ) func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + case tea.WindowSizeMsg: + return m.handleResize(msg) + case time.Time: + return m.handleTick(msg) + } + if m.state == stateConfirmDelete { return m.handleConfirmDelete(msg) } @@ -17,10 +25,6 @@ func (m Model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { } switch msg := msg.(type) { - case tea.WindowSizeMsg: - return m.handleResize(msg) - case time.Time: - return m.handleTick(msg) case tea.MouseMsg: return m.handleMouse(msg) case tea.KeyMsg: @@ -122,6 +126,8 @@ func (m *Model) handleResize(msg tea.WindowSizeMsg) (tea.Model, tea.Cmd) { } m.logViewport.Width = msg.Width - chromePadH m.logViewport.Height = msg.Height - (chromePadV + chromeHeader + chromeGaps + chromeFooter) + m.historyViewport.Width = msg.Width - chromePadH + m.historyViewport.Height = msg.Height - 10 return m, tea.ClearScreen } @@ -134,6 +140,15 @@ func (m *Model) handleTick(t time.Time) (tea.Model, tea.Cmd) { } func (m *Model) handleMouse(msg tea.MouseMsg) (tea.Model, tea.Cmd) { + if m.state == stateHistory { + switch msg.Button { + case tea.MouseButtonWheelUp: + m.historyViewport.ScrollUp(3) + case tea.MouseButtonWheelDown: + m.historyViewport.ScrollDown(3) + } + return m, nil + } if m.state != stateDashboard && m.state != stateLogs && m.state != stateUsers { return m, nil } @@ -187,6 +202,8 @@ func (m *Model) handleKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { switch m.state { case stateDetail: return m.handleDetailKey(msg) + case stateHistory: + return m.handleHistoryKey(msg) case stateAlertDetail: return m.handleAlertDetailKey(msg) case stateDashboard, stateLogs, stateUsers: @@ -229,12 +246,49 @@ func (m *Model) handleDetailKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { switch msg.String() { case "i", "esc": m.state = stateDashboard + case "e": + return m.handleEditItem() + case "h": + if m.cursor < len(m.sites) { + site := m.sites[m.cursor] + m.historySiteName = site.Name + m.historyChanges = m.engine.GetStateChanges(site.ID, 100) + m.historyViewport = viewport.New( + m.termWidth-chromePadH, + m.termHeight-10, + ) + m.historyViewport.SetContent(m.buildHistoryContent()) + m.historyViewport.GotoTop() + m.state = stateHistory + } case "q": return m, tea.Quit } return m, nil } +func (m *Model) handleHistoryKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { + switch msg.String() { + case "q", "esc": + m.state = stateDetail + case "up", "k": + m.historyViewport.ScrollUp(1) + case "down", "j": + m.historyViewport.ScrollDown(1) + case "pgup": + m.historyViewport.HalfPageUp() + case "pgdown": + m.historyViewport.HalfPageDown() + case "home", "g": + m.historyViewport.GotoTop() + case "end", "G": + m.historyViewport.GotoBottom() + case "ctrl+c": + return m, tea.Quit + } + return m, nil +} + func (m *Model) handleAlertDetailKey(msg tea.KeyMsg) (tea.Model, tea.Cmd) { switch msg.String() { case "i", "esc": diff --git a/internal/tui/view_dashboard.go b/internal/tui/view_dashboard.go index bdd3646..08fdaac 100644 --- a/internal/tui/view_dashboard.go +++ b/internal/tui/view_dashboard.go @@ -96,6 +96,8 @@ func (m Model) View() string { return "" case stateDetail: return m.viewDetailPanel() + case stateHistory: + return m.viewHistoryPanel() case stateAlertDetail: return m.viewAlertDetailPanel() default: diff --git a/internal/tui/view_detail.go b/internal/tui/view_detail.go index 90b5320..340326d 100644 --- a/internal/tui/view_detail.go +++ b/internal/tui/view_detail.go @@ -176,7 +176,7 @@ func (m Model) viewDetailPanel() string { stateChanges := m.engine.GetStateChanges(site.ID, 5) if len(stateChanges) > 0 { b.WriteString("\n" + subtleStyle.Render(" STATE CHANGES") + "\n") - for _, sc := range stateChanges { + for i, sc := range stateChanges { ago := fmtDuration(time.Since(sc.ChangedAt)) arrow := subtleStyle.Render(sc.FromStatus) + " → " if sc.ToStatus == "UP" { @@ -185,11 +185,15 @@ func (m Model) viewDetailPanel() string { arrow += dangerStyle.Render(sc.ToStatus) } line := fmt.Sprintf(" %s %s", arrow, subtleStyle.Render(ago+" ago")) + if dur := computeOutageDuration(stateChanges, i); dur > 0 { + line += " " + warnStyle.Render("outage "+fmtDuration(dur)) + } if sc.ErrorReason != "" && sc.ToStatus != "UP" { line += " " + dangerStyle.Render(sc.ErrorReason) } b.WriteString(line + "\n") } + b.WriteString(" " + subtleStyle.Render("[h] History") + "\n") } b.WriteString("\n") @@ -235,7 +239,7 @@ func (m Model) viewDetailPanel() string { } b.WriteString("\n\n") - b.WriteString(subtleStyle.Render(" [i/Esc] Back [e] Edit [q] Quit")) + b.WriteString(subtleStyle.Render(" [i/Esc] Back [e] Edit [h] History [q] Quit")) return lipgloss.NewStyle().Padding(1, 2).Render(b.String()) } diff --git a/internal/tui/view_history.go b/internal/tui/view_history.go new file mode 100644 index 0000000..7811dae --- /dev/null +++ b/internal/tui/view_history.go @@ -0,0 +1,188 @@ +package tui + +import ( + "fmt" + "strings" + "time" + + "gitea.lerkolabs.com/lerkolabs/uptop/internal/models" + "github.com/charmbracelet/lipgloss" +) + +type historyStats struct { + totalEvents int + outageCount int + totalDowntime time.Duration +} + +func computeOutageDuration(changes []models.StateChange, idx int) time.Duration { + sc := changes[idx] + if sc.ToStatus != "UP" { + return 0 + } + if idx+1 >= len(changes) { + return 0 + } + prev := changes[idx+1] + if prev.ToStatus == "UP" { + return 0 + } + dur := sc.ChangedAt.Sub(prev.ChangedAt) + if dur < 0 { + return 0 + } + return dur +} + +func computeHistoryStats(changes []models.StateChange) historyStats { + var s historyStats + s.totalEvents = len(changes) + for i := range changes { + dur := computeOutageDuration(changes, i) + if dur > 0 { + s.outageCount++ + s.totalDowntime += dur + } + } + return s +} + +func stateChangeSparkline(changes []models.StateChange, width int) string { + if len(changes) < 2 || width < 4 { + return "" + } + + oldest := changes[len(changes)-1].ChangedAt + newest := changes[0].ChangedAt + span := newest.Sub(oldest) + if span <= 0 { + return "" + } + + buckets := make([]int, width) + for _, sc := range changes { + pos := int(float64(sc.ChangedAt.Sub(oldest)) / float64(span) * float64(width-1)) + if pos >= width { + pos = width - 1 + } + if pos < 0 { + pos = 0 + } + buckets[pos]++ + } + + maxVal := 0 + for _, v := range buckets { + if v > maxVal { + maxVal = v + } + } + if maxVal == 0 { + return "" + } + + var sb strings.Builder + for _, v := range buckets { + if v == 0 { + sb.WriteRune('·') + continue + } + idx := int(float64(v) / float64(maxVal) * 7) + if idx > 7 { + idx = 7 + } + ch := string(sparkChars[idx]) + switch { + case v >= 3: + sb.WriteString(dangerStyle.Render(ch)) + case v >= 2: + sb.WriteString(warnStyle.Render(ch)) + default: + sb.WriteString(subtleStyle.Render(ch)) + } + } + return sb.String() +} + +func (m Model) buildHistoryContent() string { + var b strings.Builder + + reasonWidth := m.termWidth - chromePadH - 55 + if reasonWidth < 10 { + reasonWidth = 10 + } + if reasonWidth > 60 { + reasonWidth = 60 + } + + for i, sc := range m.historyChanges { + ts := sc.ChangedAt.Format("2006-01-02 15:04") + + arrow := subtleStyle.Render(sc.FromStatus) + " → " + if sc.ToStatus == "UP" { + arrow += specialStyle.Render(sc.ToStatus) + } else { + arrow += dangerStyle.Render(sc.ToStatus) + } + + durStr := "" + if dur := computeOutageDuration(m.historyChanges, i); dur > 0 { + durStr = warnStyle.Render("outage " + fmtDuration(dur)) + } + + reason := "" + if sc.ErrorReason != "" && sc.ToStatus != "UP" { + reason = dangerStyle.Render(limitStr(sc.ErrorReason, reasonWidth)) + } + + fmt.Fprintf(&b, " %-18s %s %-12s %s\n", ts, arrow, durStr, reason) + } + + return b.String() +} + +func (m Model) viewHistoryPanel() string { + var b strings.Builder + + header := " " + titleStyle.Render("STATE HISTORY: "+m.historySiteName) + header += " " + subtleStyle.Render("[q] Back") + b.WriteString(header + "\n") + + divWidth := m.termWidth - chromePadH - 4 + if divWidth < 40 { + divWidth = 40 + } + b.WriteString(" " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n") + + sparkline := stateChangeSparkline(m.historyChanges, divWidth) + if sparkline != "" { + b.WriteString(" " + sparkline + "\n") + b.WriteString(" " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n") + } + + fmt.Fprintf(&b, " %-18s %-17s %-12s %s\n", + subtleStyle.Render("TIME"), + subtleStyle.Render("TRANSITION"), + subtleStyle.Render("DURATION"), + subtleStyle.Render("REASON")) + + if len(m.historyChanges) == 0 { + b.WriteString("\n " + subtleStyle.Render("No state changes recorded") + "\n") + } else { + b.WriteString(m.historyViewport.View()) + } + + b.WriteString("\n " + subtleStyle.Render(strings.Repeat("─", divWidth)) + "\n") + + stats := computeHistoryStats(m.historyChanges) + parts := []string{fmt.Sprintf("%d events", stats.totalEvents)} + if stats.outageCount > 0 { + parts = append(parts, fmt.Sprintf("%d outages", stats.outageCount)) + avg := stats.totalDowntime / time.Duration(stats.outageCount) + parts = append(parts, "avg outage "+fmtDuration(avg)) + } + b.WriteString(" " + subtleStyle.Render(strings.Join(parts, " │ ")) + "\n") + b.WriteString(" " + subtleStyle.Render("[j/k/↑/↓] Scroll [q/Esc] Back")) + + return lipgloss.NewStyle().Padding(1, 2).Render(b.String()) +} diff --git a/internal/tui/view_history_test.go b/internal/tui/view_history_test.go new file mode 100644 index 0000000..e3871bc --- /dev/null +++ b/internal/tui/view_history_test.go @@ -0,0 +1,171 @@ +package tui + +import ( + "testing" + "time" + + "gitea.lerkolabs.com/lerkolabs/uptop/internal/models" +) + +func TestComputeOutageDuration(t *testing.T) { + now := time.Date(2026, 6, 3, 14, 0, 0, 0, time.UTC) + tests := []struct { + name string + changes []models.StateChange + idx int + want time.Duration + }{ + { + "recovery with preceding DOWN", + []models.StateChange{ + {ToStatus: "UP", ChangedAt: now}, + {ToStatus: "DOWN", ChangedAt: now.Add(-10 * time.Minute)}, + }, + 0, + 10 * time.Minute, + }, + { + "not a recovery transition", + []models.StateChange{ + {ToStatus: "DOWN", ChangedAt: now}, + {ToStatus: "UP", ChangedAt: now.Add(-1 * time.Hour)}, + }, + 0, + 0, + }, + { + "no preceding entry", + []models.StateChange{ + {ToStatus: "UP", ChangedAt: now}, + }, + 0, + 0, + }, + { + "preceding is also UP", + []models.StateChange{ + {ToStatus: "UP", ChangedAt: now}, + {ToStatus: "UP", ChangedAt: now.Add(-5 * time.Minute)}, + }, + 0, + 0, + }, + { + "empty slice", + []models.StateChange{}, + 0, + 0, + }, + { + "middle of list", + []models.StateChange{ + {ToStatus: "DOWN", ChangedAt: now}, + {ToStatus: "UP", ChangedAt: now.Add(-30 * time.Minute)}, + {ToStatus: "DOWN", ChangedAt: now.Add(-2 * time.Hour)}, + }, + 1, + 90 * time.Minute, + }, + { + "recovery from LATE", + []models.StateChange{ + {ToStatus: "UP", ChangedAt: now}, + {ToStatus: "LATE", ChangedAt: now.Add(-5 * time.Minute)}, + }, + 0, + 5 * time.Minute, + }, + { + "recovery from SSL EXP", + []models.StateChange{ + {ToStatus: "UP", ChangedAt: now}, + {ToStatus: "SSL EXP", ChangedAt: now.Add(-1 * time.Hour)}, + }, + 0, + 1 * time.Hour, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.idx >= len(tt.changes) { + if tt.want != 0 { + t.Fatalf("invalid test: idx %d out of range", tt.idx) + } + return + } + got := computeOutageDuration(tt.changes, tt.idx) + if got != tt.want { + t.Errorf("got %v, want %v", got, tt.want) + } + }) + } +} + +func TestComputeHistoryStats(t *testing.T) { + now := time.Date(2026, 6, 3, 14, 0, 0, 0, time.UTC) + changes := []models.StateChange{ + {ToStatus: "UP", ChangedAt: now}, + {ToStatus: "DOWN", ChangedAt: now.Add(-10 * time.Minute)}, + {ToStatus: "UP", ChangedAt: now.Add(-1 * time.Hour)}, + {ToStatus: "DOWN", ChangedAt: now.Add(-3 * time.Hour)}, + } + + stats := computeHistoryStats(changes) + + if stats.totalEvents != 4 { + t.Errorf("totalEvents: got %d, want 4", stats.totalEvents) + } + if stats.outageCount != 2 { + t.Errorf("outageCount: got %d, want 2", stats.outageCount) + } + expectedDowntime := 10*time.Minute + 2*time.Hour + if stats.totalDowntime != expectedDowntime { + t.Errorf("totalDowntime: got %v, want %v", stats.totalDowntime, expectedDowntime) + } +} + +func TestComputeHistoryStats_Empty(t *testing.T) { + stats := computeHistoryStats(nil) + if stats.totalEvents != 0 || stats.outageCount != 0 || stats.totalDowntime != 0 { + t.Errorf("expected zero stats for nil, got %+v", stats) + } +} + +func TestStateChangeSparkline(t *testing.T) { + t.Run("empty", func(t *testing.T) { + if got := stateChangeSparkline(nil, 20); got != "" { + t.Errorf("expected empty for nil, got %q", got) + } + }) + + t.Run("single event", func(t *testing.T) { + changes := []models.StateChange{{ChangedAt: time.Now()}} + if got := stateChangeSparkline(changes, 20); got != "" { + t.Errorf("expected empty for single event, got %q", got) + } + }) + + t.Run("two events produces output", func(t *testing.T) { + now := time.Now() + changes := []models.StateChange{ + {ChangedAt: now}, + {ChangedAt: now.Add(-1 * time.Hour)}, + } + got := stateChangeSparkline(changes, 20) + if got == "" { + t.Error("expected non-empty sparkline for two events") + } + }) + + t.Run("width too small", func(t *testing.T) { + now := time.Now() + changes := []models.StateChange{ + {ChangedAt: now}, + {ChangedAt: now.Add(-1 * time.Hour)}, + } + if got := stateChangeSparkline(changes, 3); got != "" { + t.Errorf("expected empty for width 3, got %q", got) + } + }) +}