Skip to content

Commit c9677d9

Browse files
committed
fix: use env var in validation not spec version
Signed-off-by: alimaazamat <alima.azamat2003@gmail.com>
1 parent 3fada64 commit c9677d9

File tree

2 files changed

+55
-24
lines changed

2 files changed

+55
-24
lines changed

ray-operator/controllers/ray/utils/validation.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -609,8 +609,9 @@ func validateWorkerGroupIdleTimeout(workerGroup rayv1.WorkerGroupSpec, spec *ray
609609
}
610610

611611
// idleTimeoutSeconds only allowed on autoscaler v2
612-
if !IsAutoscalingV2Enabled(spec) {
613-
return fmt.Errorf("worker group %s has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2", workerGroup.GroupName)
612+
envVar, exists := EnvVarByName(RAY_ENABLE_AUTOSCALER_V2, spec.HeadGroupSpec.Template.Spec.Containers[RayContainerIndex].Env)
613+
if !exists || (envVar.Value != "1" && envVar.Value != "true") {
614+
return fmt.Errorf("worker group %s has idleTimeoutSeconds set, but %s environment variable is not set to 'true' in the head pod", workerGroup.GroupName, RAY_ENABLE_AUTOSCALER_V2)
614615
}
615616
}
616617

ray-operator/controllers/ray/utils/validation_test.go

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,11 +1891,10 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
18911891
"should accept worker group with valid idleTimeoutSeconds": {
18921892
spec: rayv1.RayClusterSpec{
18931893
EnableInTreeAutoscaling: ptr.To(true),
1894-
AutoscalerOptions: &rayv1.AutoscalerOptions{
1895-
Version: ptr.To(rayv1.AutoscalerVersionV2),
1896-
},
18971894
HeadGroupSpec: rayv1.HeadGroupSpec{
1898-
Template: podTemplateSpec(nil, nil),
1895+
Template: podTemplateSpec([]corev1.EnvVar{
1896+
{Name: RAY_ENABLE_AUTOSCALER_V2, Value: "1"},
1897+
}, nil),
18991898
},
19001899
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
19011900
{
@@ -1912,11 +1911,10 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
19121911
"should reject negative idleTimeoutSeconds": {
19131912
spec: rayv1.RayClusterSpec{
19141913
EnableInTreeAutoscaling: ptr.To(true),
1915-
AutoscalerOptions: &rayv1.AutoscalerOptions{
1916-
Version: ptr.To(rayv1.AutoscalerVersionV2),
1917-
},
19181914
HeadGroupSpec: rayv1.HeadGroupSpec{
1919-
Template: podTemplateSpec(nil, nil),
1915+
Template: podTemplateSpec([]corev1.EnvVar{
1916+
{Name: RAY_ENABLE_AUTOSCALER_V2, Value: "1"},
1917+
}, nil),
19201918
},
19211919
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
19221920
{
@@ -1933,11 +1931,10 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
19331931
"should accept zero idleTimeoutSeconds": {
19341932
spec: rayv1.RayClusterSpec{
19351933
EnableInTreeAutoscaling: ptr.To(true),
1936-
AutoscalerOptions: &rayv1.AutoscalerOptions{
1937-
Version: ptr.To(rayv1.AutoscalerVersionV2),
1938-
},
19391934
HeadGroupSpec: rayv1.HeadGroupSpec{
1940-
Template: podTemplateSpec(nil, nil),
1935+
Template: podTemplateSpec([]corev1.EnvVar{
1936+
{Name: RAY_ENABLE_AUTOSCALER_V2, Value: "1"},
1937+
}, nil),
19411938
},
19421939
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
19431940
{
@@ -1954,9 +1951,6 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
19541951
"should reject idleTimeoutSeconds when autoscaler version is not v2": {
19551952
spec: rayv1.RayClusterSpec{
19561953
EnableInTreeAutoscaling: ptr.To(true),
1957-
AutoscalerOptions: &rayv1.AutoscalerOptions{
1958-
Version: ptr.To(rayv1.AutoscalerVersionV1),
1959-
},
19601954
HeadGroupSpec: rayv1.HeadGroupSpec{
19611955
Template: podTemplateSpec(nil, nil),
19621956
},
@@ -1970,7 +1964,7 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
19701964
},
19711965
},
19721966
},
1973-
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1967+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but RAY_enable_autoscaler_v2 environment variable is not set to 'true' in the head pod",
19741968
},
19751969
"should reject idleTimeoutSeconds when autoscaler version is not set": {
19761970
spec: rayv1.RayClusterSpec{
@@ -1988,12 +1982,11 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
19881982
},
19891983
},
19901984
},
1991-
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
1985+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but RAY_enable_autoscaler_v2 environment variable is not set to 'true' in the head pod",
19921986
},
19931987
"should reject idleTimeoutSeconds when AutoscalerOptions is nil": {
19941988
spec: rayv1.RayClusterSpec{
19951989
EnableInTreeAutoscaling: ptr.To(true),
1996-
AutoscalerOptions: nil,
19971990
HeadGroupSpec: rayv1.HeadGroupSpec{
19981991
Template: podTemplateSpec(nil, nil),
19991992
},
@@ -2007,14 +2000,51 @@ func TestValidateWorkerGroupIdleTimeout(t *testing.T) {
20072000
},
20082001
},
20092002
},
2010-
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but autoscaler version is not v2. Please set .spec.autoscalerOptions.version to v2",
2003+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but RAY_enable_autoscaler_v2 environment variable is not set to 'true' in the head pod",
20112004
},
2012-
"should accept worker group without idleTimeoutSeconds and without autoscaler v2": {
2005+
"should reject idleTimeoutSeconds when env var is set to invalid value": {
20132006
spec: rayv1.RayClusterSpec{
20142007
EnableInTreeAutoscaling: ptr.To(true),
2015-
AutoscalerOptions: &rayv1.AutoscalerOptions{
2016-
Version: ptr.To(rayv1.AutoscalerVersionV1),
2008+
HeadGroupSpec: rayv1.HeadGroupSpec{
2009+
Template: podTemplateSpec([]corev1.EnvVar{
2010+
{Name: RAY_ENABLE_AUTOSCALER_V2, Value: "false"},
2011+
}, nil),
20172012
},
2013+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
2014+
{
2015+
GroupName: "worker-group-1",
2016+
Template: podTemplateSpec(nil, nil),
2017+
IdleTimeoutSeconds: ptr.To(int32(60)),
2018+
MinReplicas: ptr.To(int32(0)),
2019+
MaxReplicas: ptr.To(int32(10)),
2020+
},
2021+
},
2022+
},
2023+
expectedErr: "worker group worker-group-1 has idleTimeoutSeconds set, but RAY_enable_autoscaler_v2 environment variable is not set to 'true' in the head pod",
2024+
},
2025+
"should accept worker group with idleTimeoutSeconds when env var is set to true": {
2026+
spec: rayv1.RayClusterSpec{
2027+
EnableInTreeAutoscaling: ptr.To(true),
2028+
HeadGroupSpec: rayv1.HeadGroupSpec{
2029+
Template: podTemplateSpec([]corev1.EnvVar{
2030+
{Name: RAY_ENABLE_AUTOSCALER_V2, Value: "true"},
2031+
}, nil),
2032+
},
2033+
WorkerGroupSpecs: []rayv1.WorkerGroupSpec{
2034+
{
2035+
GroupName: "worker-group-1",
2036+
Template: podTemplateSpec(nil, nil),
2037+
IdleTimeoutSeconds: ptr.To(int32(60)),
2038+
MinReplicas: ptr.To(int32(0)),
2039+
MaxReplicas: ptr.To(int32(10)),
2040+
},
2041+
},
2042+
},
2043+
expectedErr: "",
2044+
},
2045+
"should accept worker group without idleTimeoutSeconds and without autoscaler v2": {
2046+
spec: rayv1.RayClusterSpec{
2047+
EnableInTreeAutoscaling: ptr.To(true),
20182048
HeadGroupSpec: rayv1.HeadGroupSpec{
20192049
Template: podTemplateSpec(nil, nil),
20202050
},

0 commit comments

Comments
 (0)