Skip to content

Commit 89974a1

Browse files
pajakdmbobrovskyi
andauthored
[slice] Rename states + add timeout (#810)
* Rename states + add timeout * Cleanup * Update api * Remove special logic for deformed/deactivating * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/core/slice.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/controller/workload_controller.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/core/slice.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Address comments * Update slice/internal/core/constants.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Address comments * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update failed state * Address comment * Update slice/internal/core/slice.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Address comments * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/util/testing/wrappers.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Address comments * Update doc * Update doc * Update doc * Update slice/internal/controller/workload_controller.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Adjust after addressing comment * Update slice/internal/controller/workload_controller.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/controller/workload_controller.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/core/slice.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/core/slice.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/internal/core/core.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Remove unused import * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Update slice/test/e2e/jobset_test.go Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com> * Address comments * Fix format --------- Co-authored-by: Mykhailo Bobrovskyi <mikhail.bobrovsky@gmail.com>
1 parent 577ca58 commit 89974a1

File tree

10 files changed

+407
-301
lines changed

10 files changed

+407
-301
lines changed

slice/api/v1alpha1/slice_types.go

Lines changed: 29 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,21 +21,33 @@ import (
2121
)
2222

2323
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.
24+
type Type string
25+
26+
const (
27+
TypeV6e Type = "v6e"
28+
TypeTpu7x Type = "tpu-v7x"
29+
)
2430

2531
// SliceSpec defines the desired state of Slice.
2632
type SliceSpec struct {
27-
// AcceleratorType specifies the type of accelerator used in this slice.
33+
// Type specifies the type of accelerator used in this slice, e.g., "v6e", "tpu-v7x".
2834
// +kubebuilder:validation:Immutable
29-
Type string `json:"type"`
35+
// +kubebuilder:validation:Enum=v6e;tpu-v7x
36+
Type Type `json:"type"`
3037

3138
// Topology represents the network topology of the slice.
39+
// It defines the physical arrangement of TPU chips in a 2D or 3D mesg.
40+
// The topology must be specified in `<X>x<Y>` or `<X>x<Y>x<Z>` format.
3241
// +kubebuilder:validation:Immutable
42+
// +kubebuilder:validation:Pattern=^\d+x\d+(x\d+)?$
3343
Topology string `json:"topology"`
3444

35-
// Partition Ids denotes the set of partitions to use to form a slice
36-
// For slices that span multiple partitions, it will be a list of 4x4x4 Ids
37-
// For sub-partition topology, it will be a single entry corresponding to the ID with the partition Ids
38-
PartitionIDs []string `json:"partitionIds"`
45+
// PartitionIds denotes the set of partitions to use to form a slice
46+
// For slices that span multiple partitions, it will be a list of 4x4x4 IDs
47+
// For sub-partition topology, it will be a single entry corresponding to the ID of the partition
48+
// +kubebuilder:validation:Immutable
49+
// +kubebuilder:validation:MinItems=1
50+
PartitionIds []string `json:"partitionIds"`
3951
}
4052

4153
// SliceStatus defines the observed state of Slice.
@@ -45,17 +57,17 @@ type SliceStatus struct {
4557
Conditions []metav1.Condition `json:"conditions,omitempty"`
4658

4759
// Populated to match the physical topology of block the Super-Slice is running on
48-
BlockID string `json:"blockId,omitempty"`
60+
BlockId string `json:"blockId,omitempty"`
4961

5062
// Populated to list of physical topology of sub-block the Super-Slice is running on
51-
SubBlockIDs []string `json:"subBlockIds,omitempty"`
63+
SubBlockIds []string `json:"subBlockIds,omitempty"`
5264
}
5365

5466
// +kubebuilder:object:root=true
5567
// +kubebuilder:subresource:status
5668
// +kubebuilder:printcolumn:name="Type",type=string,JSONPath=`.spec.type`
5769
// +kubebuilder:printcolumn:name="Topology",type=string,JSONPath=`.spec.topology`
58-
// +kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.conditions[0].type`
70+
// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.conditions[?(@.type=="Ready")].reason`
5971
// +kubebuilder:printcolumn:name="Age",type="date",JSONPath=".metadata.creationTimestamp"
6072
// Slice is the Schema for the slices API.
6173
type Slice struct {
@@ -75,22 +87,14 @@ type SliceList struct {
7587
Items []Slice `json:"items"`
7688
}
7789

78-
// SliceConditionType defines the type of condition
79-
type SliceConditionType string
80-
81-
const (
82-
// Forming means the slice is being created and configured.
83-
Forming SliceConditionType = "Forming"
84-
// Ready means the slice is fully operational.
85-
Ready SliceConditionType = "Ready"
86-
// Degraded means the slice is operational but with reduced capacity or performance.
87-
Degraded SliceConditionType = "Degraded"
88-
// Deformed means the slice is being torn down.
89-
Deformed SliceConditionType = "Deformed"
90-
// Error means the slice has encountered an error and is not operational.
91-
Error SliceConditionType = "Error"
92-
)
93-
9490
func init() {
9591
SchemeBuilder.Register(&Slice{}, &SliceList{})
9692
}
93+
94+
const (
95+
// Represent the underlying hardware readiness status
96+
SliceStateConditionType = "Ready"
97+
// Represent whether the user/scheduler should take action on the slice
98+
// The slice is in an error state that can't not automatically recover
99+
SliceCreationFailedConditionType = "SliceCreationFailed"
100+
)

slice/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

slice/config/crd/bases/slice.accelerator.gke.io_slices.yaml

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ spec:
2121
- jsonPath: .spec.topology
2222
name: Topology
2323
type: string
24-
- jsonPath: .status.conditions[0].type
25-
name: Status
24+
- jsonPath: .status.conditions[?(@.type=="Ready")].reason
25+
name: State
2626
type: string
2727
- jsonPath: .metadata.creationTimestamp
2828
name: Age
@@ -54,18 +54,26 @@ spec:
5454
properties:
5555
partitionIds:
5656
description: |-
57-
Partition Ids denotes the set of partitions to use to form a slice
58-
For slices that span multiple partitions, it will be a list of 4x4x4 Ids
59-
For sub-partition topology, it will be a single entry corresponding to the ID with the partition Ids
57+
PartitionIds denotes the set of partitions to use to form a slice
58+
For slices that span multiple partitions, it will be a list of 4x4x4 IDs
59+
For sub-partition topology, it will be a single entry corresponding to the ID of the partition
6060
items:
6161
type: string
62+
minItems: 1
6263
type: array
6364
topology:
64-
description: Topology represents the network topology of the slice.
65+
description: |-
66+
Topology represents the network topology of the slice.
67+
It defines the physical arrangement of TPU chips in a 2D or 3D mesg.
68+
The topology must be specified in `<X>x<Y>` or `<X>x<Y>x<Z>` format.
69+
pattern: ^\d+x\d+(x\d+)?$
6570
type: string
6671
type:
67-
description: AcceleratorType specifies the type of accelerator used
68-
in this slice.
72+
description: Type specifies the type of accelerator used in this slice,
73+
e.g., "v6e", "tpu-v7x".
74+
enum:
75+
- v6e
76+
- tpu-v7x
6977
type: string
7078
required:
7179
- partitionIds

slice/internal/controller/workload_controller.go

Lines changed: 42 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23-
"slices"
2423
"sort"
2524
"strings"
2625
"time"
@@ -162,7 +161,7 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
162161
return ctrl.Result{}, err
163162
}
164163

165-
deleted, _, errored, _ := r.groupSlices(slices)
164+
deleted, toDelete, _ := r.groupSlices(slices)
166165
if len(deleted) > 0 {
167166
log.V(3).Info(
168167
"Waiting for deleted Slices to be cleaned up; skipping reconciliation for now",
@@ -181,12 +180,12 @@ func (r *WorkloadReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c
181180
return ctrl.Result{}, client.IgnoreNotFound(err)
182181
}
183182

184-
if len(errored) > 0 {
183+
if len(toDelete) > 0 {
185184
log.V(3).Info(
186-
"Deleting errored Slices",
187-
"erroredSlices", klog.KObjSlice(errored),
185+
"Deleting Slices",
186+
"slices", klog.KObjSlice(toDelete),
188187
)
189-
err = r.deleteSlices(ctx, errored)
188+
err = r.deleteSlices(ctx, toDelete)
190189
if err != nil {
191190
return ctrl.Result{}, err
192191
}
@@ -256,29 +255,21 @@ func (r *WorkloadReconciler) cleanupSlices(ctx context.Context, wl *kueue.Worklo
256255
return false, err
257256
}
258257

259-
deleted, deformed, errored, other := r.groupSlices(slices)
258+
deleted, toDelete, other := r.groupSlices(slices)
260259

261260
if len(deleted) == len(slices) {
262261
log.V(3).Info("All slices already deleted; finishing cleanup")
263262
return true, nil
264263
}
265264

266-
if len(deformed) > 0 {
267-
log.V(3).Info("Found Slices in deformed state; cleaning them up", "deformedSlices", klog.KObjSlice(deformed))
268-
// We still need to delete deformed Slices because requeueing causes a conflict error during Slice creation.
269-
err = r.deleteSlices(ctx, deformed)
270-
if err != nil {
271-
return false, err
272-
}
273-
}
274-
275-
if len(other)+len(errored) > 0 {
265+
if len(other)+len(toDelete) > 0 {
276266
terminated, err := r.ownerPodsFinished(ctx, wl)
277267
if err != nil || !terminated {
278268
return false, err
279269
}
280270
}
281-
toDelete := append(errored, other...)
271+
// after pods are terminated we should cleanup all the slices (including active ones)
272+
toDelete = append(toDelete, other...)
282273
log.V(3).Info("Deleting Slices", "slices", klog.KObjSlice(toDelete))
283274
err = r.deleteSlices(ctx, toDelete)
284275
if err != nil {
@@ -300,34 +291,31 @@ func (r *WorkloadReconciler) findWorkloadSlices(ctx context.Context, wl *kueue.W
300291
return slices.Items, nil
301292
}
302293

303-
// groupSlices categorizes a list of Slice objects into four groups based on their state.
304-
// It separates slices into deleted (marked for deletion), deformed (being torn down),
305-
// errored (has encountered an error) and other (active) slices.
294+
// groupSlices categorizes a list of Slice objects into three groups based on their state.
295+
// It separates slices into deleted (marked for deletion), ones that should be delete
296+
// (errored and stale) and other (active) slices.
306297
//
307298
// Parameters:
308299
//
309300
// slices - A slice of v1alpha1.Slice objects to be categorized.
310301
//
311302
// Returns:
312303
// - A slice containing deleted Slice objects (with non-zero DeletionTimestamp).
313-
// - A slice containing deformed Slice objects (being torn down).
314-
// - A slice containing errored Slice objects.
315-
// - A slice containing other Slice objects (active/valid slices).
316-
func (r *WorkloadReconciler) groupSlices(slices []v1alpha1.Slice) ([]v1alpha1.Slice, []v1alpha1.Slice, []v1alpha1.Slice, []v1alpha1.Slice) {
317-
var deleted, deformed, errored, other []v1alpha1.Slice
304+
// - A slice containing Slice objects that should be deleted (errored and stale slices).
305+
// - A slice containing other Slice objects (active/initializing slices).
306+
func (r *WorkloadReconciler) groupSlices(slices []v1alpha1.Slice) ([]v1alpha1.Slice, []v1alpha1.Slice, []v1alpha1.Slice) {
307+
var deleted, toDelete, other []v1alpha1.Slice
318308
for _, slice := range slices {
319-
switch {
320-
case !slice.DeletionTimestamp.IsZero():
309+
switch core.GetSliceState(slice) {
310+
case core.SliceStateDeleted:
321311
deleted = append(deleted, slice)
322-
case core.Deformed(&slice):
323-
deformed = append(deformed, slice)
324-
case core.IsError(&slice):
325-
errored = append(errored, slice)
312+
case core.SliceStateFailed, core.SliceStateStale:
313+
toDelete = append(toDelete, slice)
326314
default:
327315
other = append(other, slice)
328316
}
329317
}
330-
return deleted, deformed, errored, other
318+
return deleted, toDelete, other
331319
}
332320

333321
func (r *WorkloadReconciler) deleteSlices(ctx context.Context, slices []v1alpha1.Slice) error {
@@ -507,18 +495,16 @@ func shouldCreateSliceForPodSetAssignment(wl *kueue.Workload, psa kueue.PodSetAs
507495
return false
508496
}
509497

510-
func parseTopologyAssignmentIntoNodeSelector(slice *v1alpha1.Slice, topologyAssignment *kueue.TopologyAssignment, nodes map[string]corev1.Node) {
511-
subBlockIDs := sets.New[string]()
498+
func parseTopologyAssignmentIntoPartitionIds(slice *v1alpha1.Slice, topologyAssignment *kueue.TopologyAssignment, nodes map[string]corev1.Node) {
499+
subBlockIds := sets.New[string]()
512500
// we already validated that all assignments have a valid level,
513501
// in validateRelevantWorkload.
514502
hostnameLevelIndex := topology.HostnameLevelIndex(topologyAssignment)
515503
for _, domain := range topologyAssignment.Domains {
516504
nodeName := domain.Values[hostnameLevelIndex]
517-
subBlockIDs.Insert(topology.GetTPUSubBlockLabelValue(nodes, nodeName))
505+
subBlockIds.Insert(topology.GetTPUSubBlockLabelValue(nodes, nodeName))
518506
}
519-
partitionIDs := subBlockIDs.UnsortedList()
520-
slices.Sort(partitionIDs)
521-
slice.Spec.PartitionIDs = partitionIDs
507+
slice.Spec.PartitionIds = sets.List(subBlockIds)
522508
}
523509

524510
func (r *WorkloadReconciler) createSlice(ctx context.Context, wl *kueue.Workload, ac *kueue.AdmissionCheckState, psa *kueue.PodSetAssignment, nodes map[string]corev1.Node) (*v1alpha1.Slice, error) {
@@ -529,10 +515,10 @@ func (r *WorkloadReconciler) createSlice(ctx context.Context, wl *kueue.Workload
529515
if err := controllerutil.SetControllerReference(wl, slice, r.client.Scheme()); err != nil {
530516
return nil, err
531517
}
532-
parseTopologyAssignmentIntoNodeSelector(slice, psa.TopologyAssignment, nodes)
518+
parseTopologyAssignmentIntoPartitionIds(slice, psa.TopologyAssignment, nodes)
533519

534520
ps := podset.FindPodSetByName(wl.Spec.PodSets, psa.Name)
535-
slice.Spec.Type = core.GetTPUAccelerator(ps.Template)
521+
slice.Spec.Type = v1alpha1.Type(core.GetTPUAccelerator(ps.Template))
536522
slice.Spec.Topology = core.GetTPUTopology(ps.Template)
537523

538524
if err := r.client.Create(ctx, slice); err != nil {
@@ -594,8 +580,8 @@ func (r *WorkloadReconciler) syncAdmissionCheckStatus(ctx context.Context, wl *k
594580
if ac.Message != originalMessage {
595581
// Logging error messages if exists
596582
for _, slice := range slices {
597-
cond := meta.FindStatusCondition(slice.Status.Conditions, string(v1alpha1.Error))
598-
if cond != nil && cond.Status == metav1.ConditionTrue {
583+
cond := meta.FindStatusCondition(slice.Status.Conditions, v1alpha1.SliceStateConditionType)
584+
if cond != nil && cond.Status == metav1.ConditionFalse && cond.Reason == string(core.MMIGHealthStatusFailed) {
599585
log.V(2).Info(
600586
"WARNING: The Slice is not operational due to an error",
601587
"slice", klog.KObj(&slice),
@@ -608,42 +594,27 @@ func (r *WorkloadReconciler) syncAdmissionCheckStatus(ctx context.Context, wl *k
608594
return nil
609595
}
610596

611-
func groupSlicesByState(slices []v1alpha1.Slice) (map[v1alpha1.SliceConditionType][]v1alpha1.Slice, []v1alpha1.Slice) {
612-
slicesByState := make(map[v1alpha1.SliceConditionType][]v1alpha1.Slice)
613-
var noState []v1alpha1.Slice
597+
func groupSlicesByState(slices []v1alpha1.Slice) map[core.SliceState][]v1alpha1.Slice {
598+
slicesByState := make(map[core.SliceState][]v1alpha1.Slice)
614599
for _, slice := range slices {
615-
foundState := false
616-
for _, status := range core.SliceStates {
617-
if meta.IsStatusConditionTrue(slice.Status.Conditions, string(status)) {
618-
slicesByState[status] = append(slicesByState[status], slice)
619-
foundState = true
620-
break
621-
}
622-
}
623-
if !foundState {
624-
noState = append(noState, slice)
625-
}
600+
slicesByState[core.GetSliceState(slice)] = append(slicesByState[core.GetSliceState(slice)], slice)
626601
}
627-
return slicesByState, noState
602+
return slicesByState
628603
}
629604

630605
func prepareAdmissionCheckStatus(ac *kueue.AdmissionCheckState, slices []v1alpha1.Slice) {
631-
slicesByState, noState := groupSlicesByState(slices)
606+
slicesByState := groupSlicesByState(slices)
632607

633608
switch {
634-
case len(slicesByState[v1alpha1.Deformed]) > 0:
635-
ac.State = kueue.CheckStateRejected
636-
case len(slicesByState[v1alpha1.Error]) > 0:
637-
ac.State = kueue.CheckStateRetry
638-
case len(slices) == len(slicesByState[v1alpha1.Degraded])+len(slicesByState[v1alpha1.Ready]):
609+
case len(slices) == len(slicesByState[core.SliceStateActive])+len(slicesByState[core.SliceStateActiveDegraded]):
639610
ac.State = kueue.CheckStateReady
611+
case len(slicesByState[core.SliceStateFailed]) > 0:
612+
ac.State = kueue.CheckStateRetry
613+
case len(slicesByState[core.SliceStateCreated])+len(slicesByState[core.SliceStateActivating]) > 0:
614+
ac.State = kueue.CheckStatePending
640615
}
641616

642617
var stateMessages []string
643-
if len(noState) > 0 {
644-
stateMessages = append(stateMessages, fmt.Sprintf("%d Created", len(noState)))
645-
}
646-
647618
for _, state := range core.SliceStates {
648619
if count := len(slicesByState[state]); count > 0 {
649620
stateMessages = append(stateMessages, fmt.Sprintf("%d %s", count, state))
@@ -652,10 +623,10 @@ func prepareAdmissionCheckStatus(ac *kueue.AdmissionCheckState, slices []v1alpha
652623

653624
ac.Message = fmt.Sprintf("Slices are in states: %s", strings.Join(stateMessages, ", "))
654625

655-
if len(slicesByState[v1alpha1.Error]) > 0 {
626+
if len(slicesByState[core.SliceStateFailed]) > 0 {
656627
var errMessages []string
657-
for _, slice := range slicesByState[v1alpha1.Error] {
658-
cond := meta.FindStatusCondition(slice.Status.Conditions, string(v1alpha1.Error))
628+
for _, slice := range slicesByState[core.SliceStateFailed] {
629+
cond := meta.FindStatusCondition(slice.Status.Conditions, v1alpha1.SliceStateConditionType)
659630
errMessages = append(errMessages, cond.Message)
660631
}
661632
ac.Message += ". Errors: " + strings.Join(errMessages, "; ")

0 commit comments

Comments
 (0)