Skip to content

Commit 77e3f57

Browse files
authored
Merge pull request #8266 from elmiko/add-more-balance-logging
cluster-autoscaler: add logging for failed node balancing
2 parents 8e0d47c + 771b9ee commit 77e3f57

File tree

2 files changed

+23
-0
lines changed

2 files changed

+23
-0
lines changed

cluster-autoscaler/core/scaleup/orchestrator/orchestrator.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,17 @@ func (o *ScaleUpOrchestrator) ComputeExpansionOption(
469469
}
470470

471471
option.SimilarNodeGroups = o.ComputeSimilarNodeGroups(nodeGroup, nodeInfos, schedulablePodGroups, now)
472+
if option.SimilarNodeGroups != nil {
473+
// if similar node groups are found, log about them
474+
similarNodeGroupIds := make([]string, 0)
475+
for _, sng := range option.SimilarNodeGroups {
476+
similarNodeGroupIds = append(similarNodeGroupIds, sng.Id())
477+
}
478+
klog.V(5).Infof("Found %d similar node groups: %v", len(option.SimilarNodeGroups), similarNodeGroupIds)
479+
} else if o.autoscalingContext.BalanceSimilarNodeGroups {
480+
// if no similar node groups are found and the flag is enabled, log about it
481+
klog.V(5).Info("No similar node groups found")
482+
}
472483

473484
estimateStart := time.Now()
474485
expansionEstimator := o.estimatorBuilder(

cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@ limitations under the License.
1717
package nodegroupset
1818

1919
import (
20+
"fmt"
2021
"math"
2122

2223
apiv1 "k8s.io/api/core/v1"
2324
"k8s.io/apimachinery/pkg/api/resource"
2425
"k8s.io/autoscaler/cluster-autoscaler/config"
2526
"k8s.io/autoscaler/cluster-autoscaler/simulator/framework"
2627
"k8s.io/autoscaler/cluster-autoscaler/utils/scheduler"
28+
klog "k8s.io/klog/v2"
2729
)
2830

2931
// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
@@ -122,34 +124,44 @@ func IsCloudProviderNodeInfoSimilar(
122124

123125
for kind, qtyList := range capacity {
124126
if len(qtyList) != 2 {
127+
dissimilarNodesLog(n1.Node().Name, n2.Node().Name, fmt.Sprintf("missing capacity %q", kind))
125128
return false
126129
}
127130
switch kind {
128131
case apiv1.ResourceMemory:
129132
if !resourceListWithinTolerance(qtyList, ratioOpts.MaxCapacityMemoryDifferenceRatio) {
133+
dissimilarNodesLog(n1.Node().Name, n2.Node().Name, "memory not within tolerance")
130134
return false
131135
}
132136
default:
133137
// For other capacity types we require exact match.
134138
// If this is ever changed, enforcing MaxCoresTotal limits
135139
// as it is now may no longer work.
136140
if qtyList[0].Cmp(qtyList[1]) != 0 {
141+
dissimilarNodesLog(n1.Node().Name, n2.Node().Name, fmt.Sprintf("capacity resource %q does not match", kind))
137142
return false
138143
}
139144
}
140145
}
141146

142147
// For allocatable and free we allow resource quantities to be within a few % of each other
143148
if !resourceMapsWithinTolerance(allocatable, ratioOpts.MaxAllocatableDifferenceRatio) {
149+
dissimilarNodesLog(n1.Node().Name, n2.Node().Name, "allocatable resources not within tolerance")
144150
return false
145151
}
146152
if !resourceMapsWithinTolerance(free, ratioOpts.MaxFreeDifferenceRatio) {
153+
dissimilarNodesLog(n1.Node().Name, n2.Node().Name, "free resources not within tolerance")
147154
return false
148155
}
149156

150157
if !compareLabels(nodes, ignoredLabels) {
158+
dissimilarNodesLog(n1.Node().Name, n2.Node().Name, "labels do not match")
151159
return false
152160
}
153161

154162
return true
155163
}
164+
165+
func dissimilarNodesLog(node1, node2, message string) {
166+
klog.V(5).Infof("nodes %q and %q are not similar, %s", node1, node2, message)
167+
}

0 commit comments

Comments
 (0)