Skip to content

Commit 0a56175

Browse files
authored
[slice] Update Jobset E2E Test
2 parents d80d9c6 + 55d833a commit 0a56175

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

slice/hack/kind-cluster.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,12 @@ nodes:
3939
cloud.google.com/gke-tpu-accelerator: tpu-v7x
4040
cloud.google.com/gke-tpu-block: b2
4141
cloud.google.com/gke-tpu-subblock: sb3
42+
- role: worker
43+
labels:
44+
cloud.google.com/gke-node-group: tas-group
45+
cloud.google.com/gke-tpu-accelerator: tpu-v7x
46+
cloud.google.com/gke-tpu-block: b2
47+
cloud.google.com/gke-tpu-subblock: sb4
4248
kubeadmConfigPatches:
4349
- |
4450
kind: JoinConfiguration

slice/test/e2e/jobset_test.go

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ const (
4747
)
4848

4949
var (
50-
ignorePodSetTopologyRequestFields = cmpopts.IgnoreFields(kueue.PodSetTopologyRequest{}, "PodIndexLabel", "SubGroupIndexLabel", "SubGroupCount", "PodSetSliceSize")
50+
ignorePodSetTopologyRequestFields = cmpopts.IgnoreFields(kueue.PodSetTopologyRequest{}, "PodIndexLabel", "SubGroupIndexLabel")
5151
)
5252

5353
var _ = ginkgo.Describe("JobSet", func() {
@@ -81,7 +81,7 @@ var _ = ginkgo.Describe("JobSet", func() {
8181
cq = testing.MakeClusterQueue("cq").
8282
AdmissionChecks(kueue.AdmissionCheckReference(ac.Name)).
8383
ResourceGroup(*testing.MakeFlavorQuotas(rf.Name).
84-
Resource(extraResource, "128").
84+
Resource(extraResource, "9999").
8585
Obj()).
8686
Obj()
8787
utils.MustCreate(ctx, k8sClient, cq)
@@ -104,6 +104,7 @@ var _ = ginkgo.Describe("JobSet", func() {
104104
tpuTopology string
105105
parallelism int32
106106
wantSliceSize int32
107+
tpuRequests string
107108
wantDomains []kueue.TopologyDomainAssignment
108109
wantNodeSelector map[string][]string
109110
}
@@ -127,8 +128,7 @@ var _ = ginkgo.Describe("JobSet", func() {
127128
},
128129
},
129130
).
130-
RequestAndLimit("rj1", extraResource, "1").
131-
RequestAndLimit("rj2", extraResource, "1").
131+
RequestAndLimit("rj1", extraResource, tc.tpuRequests).
132132
Obj()
133133

134134
ginkgo.By("Creating a JobSet", func() {
@@ -165,8 +165,8 @@ var _ = ginkgo.Describe("JobSet", func() {
165165
g.Expect(createdWorkload.Spec.PodSets[0].TopologyRequest).To(gomega.BeComparableTo(&kueue.PodSetTopologyRequest{
166166
Required: ptr.To(core.TPUBlockLabel),
167167
PodSetSliceRequiredTopology: ptr.To(core.TPUSubBlockLabel),
168-
SubGroupCount: ptr.To[int32](2),
169-
PodSetSliceSize: ptr.To[int32](tc.wantSliceSize),
168+
SubGroupCount: ptr.To[int32](1),
169+
PodSetSliceSize: ptr.To(tc.wantSliceSize),
170170
}, ignorePodSetTopologyRequestFields))
171171
}, utils.Timeout, utils.Interval).Should(gomega.Succeed())
172172
})
@@ -281,6 +281,7 @@ var _ = ginkgo.Describe("JobSet", func() {
281281
},
282282
ginkgo.Entry("TPU topology 4x4x4 and parallelism 16", testCase{
283283
tpuTopology: "4x4x4",
284+
tpuRequests: "4",
284285
parallelism: 16,
285286
wantSliceSize: 16,
286287
wantDomains: []kueue.TopologyDomainAssignment{{
@@ -293,6 +294,7 @@ var _ = ginkgo.Describe("JobSet", func() {
293294
}),
294295
ginkgo.Entry("TPU topology 4x4x4 and parallelism 16", testCase{
295296
tpuTopology: "4x4x4",
297+
tpuRequests: "1",
296298
parallelism: 64,
297299
wantSliceSize: 64,
298300
wantDomains: []kueue.TopologyDomainAssignment{{
@@ -305,36 +307,53 @@ var _ = ginkgo.Describe("JobSet", func() {
305307
}),
306308
ginkgo.Entry("TPU topology 4x4x12 and parallelism 48", testCase{
307309
tpuTopology: "4x4x12",
310+
tpuRequests: "4",
308311
parallelism: 48,
309312
wantSliceSize: 16,
310-
wantDomains: []kueue.TopologyDomainAssignment{{
311-
Values: []string{"b1", "sb1"},
312-
Count: 48,
313-
}},
313+
wantDomains: []kueue.TopologyDomainAssignment{
314+
{
315+
Values: []string{"b2", "sb2"},
316+
Count: 16,
317+
},
318+
{
319+
Values: []string{"b2", "sb3"},
320+
Count: 16,
321+
},
322+
{
323+
Values: []string{"b2", "sb4"},
324+
Count: 16,
325+
},
326+
},
314327
wantNodeSelector: map[string][]string{
315-
controller.TPUReservationSubblockLabel: {"b1", "sb1"},
328+
controller.TPUReservationSubblockLabel: {"b2", "sb2", "sb3", "sb4"},
316329
},
317330
}),
318331
ginkgo.Entry("TPU topology 4x4x12 and parallelism 96", testCase{
319332
tpuTopology: "4x4x12",
333+
tpuRequests: "2",
320334
parallelism: 96,
321335
wantSliceSize: 32,
322336
wantDomains: []kueue.TopologyDomainAssignment{
323337
{
324338
Values: []string{"b2", "sb2"},
325-
Count: 64,
339+
Count: 32,
326340
},
327341
{
328342
Values: []string{"b2", "sb3"},
329343
Count: 32,
330344
},
345+
{
346+
Values: []string{"b2", "sb4"},
347+
Count: 32,
348+
},
331349
},
332350
wantNodeSelector: map[string][]string{
333-
controller.TPUReservationSubblockLabel: {"b2", "sb2", "sb3"},
351+
controller.TPUReservationSubblockLabel: {"b2", "sb2", "sb3", "sb4"},
334352
},
335353
}),
336354
ginkgo.Entry("TPU topology 4x4x8 and parallelism 128", testCase{
337355
tpuTopology: "4x4x8",
356+
tpuRequests: "1",
338357
parallelism: 128,
339358
wantSliceSize: 64,
340359
wantDomains: []kueue.TopologyDomainAssignment{

0 commit comments

Comments
 (0)