Skip to content

Commit 715634e

Browse files
committed
Delete Slice if Deformed status.
1 parent 7ef1eeb commit 715634e

File tree

3 files changed

+25
-277
lines changed

3 files changed

+25
-277
lines changed

slice/internal/controller/workload_controller.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -219,12 +219,18 @@ func (r *WorkloadReconciler) cleanupSlice(ctx context.Context, wl *kueue.Workloa
219219
}
220220

221221
if !slice.DeletionTimestamp.IsZero() {
222-
return core.Deformed(&slice), nil
222+
log.V(3).Info("Slice already deleted, finishing cleanup")
223+
return true, nil
223224
}
224225

225-
terminated, err := r.ownerPodsFinished(ctx, wl)
226-
if err != nil || !terminated {
227-
return false, err
226+
if !core.Deformed(&slice) {
227+
terminated, err := r.ownerPodsFinished(ctx, wl)
228+
if err != nil || !terminated {
229+
return false, err
230+
}
231+
} else {
232+
log.V(3).Info("Slice in deformed state")
233+
// We still need to delete the Slice because requeueing causes a conflict error during Slice creation.
228234
}
229235

230236
log.V(3).Info("Deleting the Slice")
@@ -236,7 +242,7 @@ func (r *WorkloadReconciler) cleanupSlice(ctx context.Context, wl *kueue.Workloa
236242
log.Error(err, "Failed to delete the Slice")
237243
}
238244

239-
return false, err
245+
return true, err
240246
}
241247

242248
func (r *WorkloadReconciler) ownerPodsFinished(ctx context.Context, wl *kueue.Workload) (bool, error) {

slice/internal/controller/workload_controller_test.go

Lines changed: 14 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -213,42 +213,29 @@ func TestWorkloadReconciler(t *testing.T) {
213213
request: baseRequest,
214214
objs: []client.Object{
215215
baseAdmissionCheckWrapper.DeepCopy(),
216+
baseJobSetWrapper.DeepCopy(),
217+
basePod1Wrapper.DeepCopy(),
216218
baseWorkloadWrapperWithAdmissionAndOwner.Clone().
217219
Active(false).
218220
Finalizers(SliceControllerName).
219221
Obj(),
220-
baseSliceWrapper.Clone().
221-
Finalizers(SliceControllerName).
222-
DeletionTimestamp(now).
223-
Deformed().
224-
Obj(),
222+
baseSliceWrapper.Clone().Deformed().Obj(),
225223
},
226224
wantWorkloads: []kueue.Workload{
227-
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().
228-
Active(false).
229-
Obj(),
230-
},
231-
wantSlices: []slice.Slice{
232-
*baseSliceWrapper.Clone().
233-
Finalizers(SliceControllerName).
234-
DeletionTimestamp(now).
235-
Deformed().
236-
Obj(),
225+
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().Active(false).Obj(),
237226
},
238227
},
239228
"shouldn't delete the finalizer because the Slice status Degraded": {
240229
request: baseRequest,
241230
objs: []client.Object{
242231
baseAdmissionCheckWrapper.DeepCopy(),
232+
baseJobSetWrapper.DeepCopy(),
233+
basePod1Wrapper.DeepCopy(),
243234
baseWorkloadWrapperWithAdmissionAndOwner.Clone().
244235
Active(false).
245236
Finalizers(SliceControllerName).
246237
Obj(),
247-
baseSliceWrapper.Clone().
248-
Finalizers(SliceControllerName).
249-
DeletionTimestamp(now).
250-
Degraded().
251-
Obj(),
238+
baseSliceWrapper.Clone().Degraded().Obj(),
252239
},
253240
wantWorkloads: []kueue.Workload{
254241
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().
@@ -257,11 +244,7 @@ func TestWorkloadReconciler(t *testing.T) {
257244
Obj(),
258245
},
259246
wantSlices: []slice.Slice{
260-
*baseSliceWrapper.Clone().
261-
Finalizers(SliceControllerName).
262-
DeletionTimestamp(now).
263-
Degraded().
264-
Obj(),
247+
*baseSliceWrapper.Clone().Degraded().Obj(),
265248
},
266249
},
267250
"should delete the Slice because the Pod Status Succeeded": {
@@ -273,17 +256,13 @@ func TestWorkloadReconciler(t *testing.T) {
273256
Active(false).
274257
Finalizers(SliceControllerName).
275258
Obj(),
276-
baseSliceWrapper.Clone().Finalizers(SliceControllerName).Obj(),
259+
baseSliceWrapper.DeepCopy(),
277260
},
278261
wantWorkloads: []kueue.Workload{
279262
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().
280263
Active(false).
281-
Finalizers(SliceControllerName).
282264
Obj(),
283265
},
284-
wantSlices: []slice.Slice{
285-
*baseSliceWrapper.Clone().Finalizers(SliceControllerName).DeletionTimestamp(now).Obj(),
286-
},
287266
},
288267
"should delete the Slice because the Pod Status PodFailed": {
289268
request: baseRequest,
@@ -295,17 +274,13 @@ func TestWorkloadReconciler(t *testing.T) {
295274
Active(false).
296275
Finalizers(SliceControllerName).
297276
Obj(),
298-
baseSliceWrapper.Clone().Finalizers(SliceControllerName).Obj(),
277+
baseSliceWrapper.DeepCopy(),
299278
},
300279
wantWorkloads: []kueue.Workload{
301280
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().
302281
Active(false).
303-
Finalizers(SliceControllerName).
304282
Obj(),
305283
},
306-
wantSlices: []slice.Slice{
307-
*baseSliceWrapper.Clone().Finalizers(SliceControllerName).DeletionTimestamp(now).Obj(),
308-
},
309284
},
310285
"shouldn't delete the Slice because the Pods still running": {
311286
request: baseRequest,
@@ -318,7 +293,7 @@ func TestWorkloadReconciler(t *testing.T) {
318293
Active(false).
319294
Finalizers(SliceControllerName).
320295
Obj(),
321-
baseSliceWrapper.Clone().Finalizers(SliceControllerName).Obj(),
296+
baseSliceWrapper.DeepCopy(),
322297
},
323298
wantWorkloads: []kueue.Workload{
324299
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().
@@ -327,7 +302,7 @@ func TestWorkloadReconciler(t *testing.T) {
327302
Obj(),
328303
},
329304
wantSlices: []slice.Slice{
330-
*baseSliceWrapper.Clone().Finalizers(SliceControllerName).Obj(),
305+
*baseSliceWrapper.DeepCopy(),
331306
},
332307
},
333308
"shouldn't delete the Slice because one of the Pods still running": {
@@ -341,7 +316,7 @@ func TestWorkloadReconciler(t *testing.T) {
341316
Active(false).
342317
Finalizers(SliceControllerName).
343318
Obj(),
344-
baseSliceWrapper.Clone().Finalizers(SliceControllerName).Obj(),
319+
baseSliceWrapper.DeepCopy(),
345320
},
346321
wantWorkloads: []kueue.Workload{
347322
*baseWorkloadWrapperWithAdmissionAndOwner.Clone().
@@ -350,7 +325,7 @@ func TestWorkloadReconciler(t *testing.T) {
350325
Obj(),
351326
},
352327
wantSlices: []slice.Slice{
353-
*baseSliceWrapper.Clone().Finalizers(SliceControllerName).Obj(),
328+
*baseSliceWrapper.DeepCopy(),
354329
},
355330
},
356331
"shouldn't add finalizer because invalid TPU topology annotation": {

0 commit comments

Comments
 (0)