@@ -103,6 +103,7 @@ var _ = ginkgo.Describe("JobSet", func() {
103
103
type testCase struct {
104
104
tpuTopology string
105
105
parallelism int32
106
+ replicas int32
106
107
wantSliceSize int32
107
108
tpuRequests string
108
109
wantDomains []kueue.TopologyDomainAssignment
@@ -117,7 +118,7 @@ var _ = ginkgo.Describe("JobSet", func() {
117
118
Name : "rj1" ,
118
119
Image : utils .E2eTestAgnHostImage ,
119
120
Args : utils .BehaviorWaitForDeletion ,
120
- Replicas : 1 ,
121
+ Replicas : tc . replicas ,
121
122
Parallelism : tc .parallelism ,
122
123
Completions : tc .parallelism ,
123
124
PodAnnotations : map [string ]string {
@@ -165,7 +166,7 @@ var _ = ginkgo.Describe("JobSet", func() {
165
166
g .Expect (createdWorkload .Spec .PodSets [0 ].TopologyRequest ).To (gomega .BeComparableTo (& kueue.PodSetTopologyRequest {
166
167
Required : ptr .To (core .TPUBlockLabel ),
167
168
PodSetSliceRequiredTopology : ptr .To (core .TPUSubBlockLabel ),
168
- SubGroupCount : ptr.To [ int32 ]( 1 ),
169
+ SubGroupCount : ptr .To ( tc . replicas ),
169
170
PodSetSliceSize : ptr .To (tc .wantSliceSize ),
170
171
}, ignorePodSetTopologyRequestFields ))
171
172
}, utils .Timeout , utils .Interval ).Should (gomega .Succeed ())
@@ -283,6 +284,7 @@ var _ = ginkgo.Describe("JobSet", func() {
283
284
tpuTopology : "4x4x4" ,
284
285
tpuRequests : "4" ,
285
286
parallelism : 16 ,
287
+ replicas : 1 ,
286
288
wantSliceSize : 16 ,
287
289
wantDomains : []kueue.TopologyDomainAssignment {{
288
290
Values : []string {"b1" , "sb1" },
@@ -296,6 +298,7 @@ var _ = ginkgo.Describe("JobSet", func() {
296
298
tpuTopology : "4x4x4" ,
297
299
tpuRequests : "1" ,
298
300
parallelism : 64 ,
301
+ replicas : 1 ,
299
302
wantSliceSize : 64 ,
300
303
wantDomains : []kueue.TopologyDomainAssignment {{
301
304
Values : []string {"b1" , "sb1" },
@@ -309,6 +312,7 @@ var _ = ginkgo.Describe("JobSet", func() {
309
312
tpuTopology : "4x4x12" ,
310
313
tpuRequests : "4" ,
311
314
parallelism : 48 ,
315
+ replicas : 1 ,
312
316
wantSliceSize : 16 ,
313
317
wantDomains : []kueue.TopologyDomainAssignment {
314
318
{
@@ -332,6 +336,7 @@ var _ = ginkgo.Describe("JobSet", func() {
332
336
tpuTopology : "4x4x12" ,
333
337
tpuRequests : "2" ,
334
338
parallelism : 96 ,
339
+ replicas : 1 ,
335
340
wantSliceSize : 32 ,
336
341
wantDomains : []kueue.TopologyDomainAssignment {
337
342
{
@@ -355,6 +360,7 @@ var _ = ginkgo.Describe("JobSet", func() {
355
360
tpuTopology : "4x4x8" ,
356
361
tpuRequests : "1" ,
357
362
parallelism : 128 ,
363
+ replicas : 1 ,
358
364
wantSliceSize : 64 ,
359
365
wantDomains : []kueue.TopologyDomainAssignment {
360
366
{
@@ -370,6 +376,46 @@ var _ = ginkgo.Describe("JobSet", func() {
370
376
controller .TPUReservationSubblockLabel : {"sb2" , "sb3" },
371
377
},
372
378
}),
379
+ ginkgo .Entry ("TPU topology 4x4x4 split across 2 replicas" , testCase {
380
+ tpuTopology : "4x4x4" ,
381
+ tpuRequests : "4" ,
382
+ parallelism : 8 ,
383
+ replicas : 2 ,
384
+ wantSliceSize : 16 ,
385
+ wantDomains : []kueue.TopologyDomainAssignment {
386
+ {
387
+ Values : []string {"b1" , "sb1" },
388
+ Count : 16 ,
389
+ },
390
+ },
391
+ wantNodeSelector : map [string ][]string {
392
+ controller .TPUReservationSubblockLabel : {"sb1" },
393
+ },
394
+ }),
395
+ ginkgo .Entry ("TPU topology 4x4x12 split across 3 replicas" , testCase {
396
+ tpuTopology : "4x4x12" ,
397
+ tpuRequests : "4" ,
398
+ parallelism : 16 ,
399
+ replicas : 3 ,
400
+ wantSliceSize : 16 ,
401
+ wantDomains : []kueue.TopologyDomainAssignment {
402
+ {
403
+ Values : []string {"b2" , "sb2" },
404
+ Count : 16 ,
405
+ },
406
+ {
407
+ Values : []string {"b2" , "sb3" },
408
+ Count : 16 ,
409
+ },
410
+ {
411
+ Values : []string {"b2" , "sb4" },
412
+ Count : 16 ,
413
+ },
414
+ },
415
+ wantNodeSelector : map [string ][]string {
416
+ controller .TPUReservationSubblockLabel : {"sb2" , "sb3" , "sb4" },
417
+ },
418
+ }),
373
419
)
374
420
})
375
421
})
0 commit comments