-
Notifications
You must be signed in to change notification settings - Fork 4
Uptime scoring for nodes #1381
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: development
Are you sure you want to change the base?
Uptime scoring for nodes #1381
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,9 +38,15 @@ func (p *PostgresDatabase) UpsertNodesGPU(ctx context.Context, gpus []types.Node | |
| } | ||
|
|
||
| func (p *PostgresDatabase) UpsertNodeHealth(ctx context.Context, healthReports []types.HealthReport) error { | ||
| // Calculate uptime scores for each health report | ||
| for i := range healthReports { | ||
| uptimeScore := p.calculateUptimeScore(ctx, healthReports[i]) | ||
| healthReports[i].UptimeScore = uptimeScore | ||
| } | ||
|
|
||
| conflictClause := clause.OnConflict{ | ||
| Columns: []clause.Column{{Name: "node_twin_id"}}, | ||
| DoUpdates: clause.AssignmentColumns([]string{"healthy", "updated_at"}), | ||
| DoUpdates: clause.AssignmentColumns([]string{"healthy", "uptime_score", "updated_at"}), | ||
| } | ||
| return p.gormDB.WithContext(ctx).Table("health_report").Clauses(conflictClause).Create(&healthReports).Error | ||
| } | ||
|
|
@@ -111,3 +117,52 @@ func (p *PostgresDatabase) UpsertNodeLocation(ctx context.Context, locations []t | |
| } | ||
| return p.gormDB.WithContext(ctx).Table("node_location").Clauses(conflictClause).Create(&locations).Error | ||
| } | ||
|
|
||
| func (p *PostgresDatabase) calculateUptimeScore(ctx context.Context, healthReport types.HealthReport) float64 { | ||
| const thirtyDaysInSeconds = 30 * 24 * 60 | ||
| const intervalsInThirtyDays = 30 * 24 * 60 / 5 // 30 days in minutes, divided by 5 minutes intervals | ||
|
|
||
| now := healthReport.UpdatedAt | ||
| thirtyDaysAgo := now - thirtyDaysInSeconds | ||
|
|
||
| newValue := 0.0 | ||
| if healthReport.Healthy { | ||
| newValue = 1.0 | ||
| } | ||
| var previousReport types.HealthReport | ||
| previousTotal := 0.0 | ||
| err := p.gormDB.WithContext(ctx).Table("health_report"). | ||
| Where("node_twin_id = ?", healthReport.NodeTwinId). | ||
| Last(&previousReport).Error | ||
|
|
||
| // If no previous report exists, we assume the previous total is 0 | ||
| if err != nil { | ||
| return newValue / intervalsInThirtyDays | ||
| } | ||
| previousTotal = previousReport.UptimeScore * intervalsInThirtyDays | ||
|
|
||
| // Get old value from 30 days ago (±1 minute) from history | ||
| oldValue := 0.0 | ||
| var oldReport types.HealthReport | ||
| startTime := thirtyDaysAgo - 60 | ||
| endTime := thirtyDaysAgo + 60 | ||
|
|
||
| err = p.gormDB.WithContext(ctx).Table("health_report"). | ||
| Where("node_twin_id = ? AND updated_at BETWEEN ? AND ?", healthReport.NodeTwinId, startTime, endTime). | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is a critical flaw for this PR This assumes historic rows to subtract the value falling out of the 30-day window, but As a result, Score won’t reflect a true x-day window. Queries for “old” rows will not find meaningful data. |
||
| Order("updated_at DESC"). | ||
| First(&oldReport).Error | ||
|
|
||
| if err == nil { | ||
| if oldReport.Healthy { | ||
| oldValue = 1.0 | ||
| } | ||
| } | ||
|
|
||
| totalHealthyIntervals := previousTotal + newValue - oldValue | ||
|
|
||
| if totalHealthyIntervals < 0 { | ||
| totalHealthyIntervals = 0 | ||
| } | ||
|
|
||
| return totalHealthyIntervals / intervalsInThirtyDays | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thirtyDaysInSeconds Should be 30 * 24 * 60 * 60 ?