From b0249cfc7911d8d3acd3ac181d2c30d0978a2dda Mon Sep 17 00:00:00 2001 From: Anuj Dube Date: Wed, 10 Dec 2025 15:48:03 +0530 Subject: [PATCH] fix(controller): do not apply driver upgrade annotation when driver is disabled The applyDriverAutoUpgradeAnnotation() function was applying the nvidia.com/gpu-driver-upgrade-enabled annotation to GPU nodes even when driver.enabled=false. This occurred because the function only checked if driver.upgradePolicy.autoUpgrade was true, without verifying that the driver component itself was enabled. This fix adds a check for Driver.IsEnabled() before applying the annotation, ensuring it is only set when: 1. Driver is enabled 2. Auto-upgrade policy exists and is enabled 3. Sandbox workloads are disabled Added unit tests to validate the fix and prevent regression. Fixes #1277 Signed-off-by: Anuj Dube --- controllers/state_manager.go | 3 +- controllers/state_manager_test.go | 68 +++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/controllers/state_manager.go b/controllers/state_manager.go index 4ea634ebe..7926b10d9 100644 --- a/controllers/state_manager.go +++ b/controllers/state_manager.go @@ -438,7 +438,8 @@ func (n *ClusterPolicyController) applyDriverAutoUpgradeAnnotation() error { updateRequired := false value := "true" annotationValue, annotationExists := node.Annotations[driverAutoUpgradeAnnotationKey] - if n.singleton.Spec.Driver.UpgradePolicy != nil && + if n.singleton.Spec.Driver.IsEnabled() && + n.singleton.Spec.Driver.UpgradePolicy != nil && n.singleton.Spec.Driver.UpgradePolicy.AutoUpgrade && !n.sandboxEnabled { // check if we need to add the annotation diff --git a/controllers/state_manager_test.go b/controllers/state_manager_test.go index bd1641e94..d37bb39d2 100644 --- a/controllers/state_manager_test.go +++ b/controllers/state_manager_test.go @@ -19,6 +19,7 @@ package controllers import ( "testing" + upgrade_v1alpha1 "github.com/NVIDIA/k8s-operator-libs/api/upgrade/v1alpha1" corev1 "k8s.io/api/core/v1" gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" @@ -186,3 +187,70 @@ func TestHasMIGCapableGPU(t *testing.T) { } } } + +func TestShouldApplyDriverAutoUpgradeAnnotation(t *testing.T) { + tests := []struct { + name string + driverEnabled bool + autoUpgradeEnabled bool + sandboxEnabled bool + wantAnnotation bool + }{ + { + name: "driver enabled with auto-upgrade", + driverEnabled: true, + autoUpgradeEnabled: true, + sandboxEnabled: false, + wantAnnotation: true, + }, + { + name: "driver disabled with auto-upgrade enabled - should NOT apply annotation", + driverEnabled: false, + autoUpgradeEnabled: true, + sandboxEnabled: false, + wantAnnotation: false, + }, + { + name: "driver enabled but auto-upgrade disabled", + driverEnabled: true, + autoUpgradeEnabled: false, + sandboxEnabled: false, + wantAnnotation: false, + }, + { + name: "driver enabled with auto-upgrade but sandbox enabled", + driverEnabled: true, + autoUpgradeEnabled: true, + sandboxEnabled: true, + wantAnnotation: false, + }, + { + name: "all disabled", + driverEnabled: false, + autoUpgradeEnabled: false, + sandboxEnabled: false, + wantAnnotation: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Create a mock ClusterPolicy + clusterPolicy := &gpuv1.ClusterPolicy{} + clusterPolicy.Spec.Driver.Enabled = &tc.driverEnabled + clusterPolicy.Spec.Driver.UpgradePolicy = &upgrade_v1alpha1.DriverUpgradePolicySpec{ + AutoUpgrade: tc.autoUpgradeEnabled, + } + + // Simulate the logic from applyDriverAutoUpgradeAnnotation + shouldApply := clusterPolicy.Spec.Driver.IsEnabled() && + clusterPolicy.Spec.Driver.UpgradePolicy != nil && + clusterPolicy.Spec.Driver.UpgradePolicy.AutoUpgrade && + !tc.sandboxEnabled + + if shouldApply != tc.wantAnnotation { + t.Errorf("Expected annotation to be applied: %v, but got: %v", tc.wantAnnotation, shouldApply) + } + }) + } +}