|
18 | 18 | "linux.aws.h100", |
19 | 19 | "linux.rocm.gpu.gfx942.1", |
20 | 20 | "linux.24xl.spr-metal", |
| 21 | + "linux.24xl.gnr", |
21 | 22 | "linux.dgx.b200", |
22 | 23 | ], |
23 | 24 | # NB: There is no 2xH100 runner at the momement, so let's use the next one |
24 | 25 | # in the list here which is 4xH100 |
25 | 26 | 2: [ |
26 | 27 | "linux.aws.h100.4", |
27 | 28 | "linux.rocm.gpu.gfx942.2", |
| 29 | + "linux.24xl.gnr", |
28 | 30 | ], |
29 | 31 | 4: [ |
30 | 32 | "linux.aws.h100.4", |
|
50 | 52 | "linux.rocm.gpu.gfx942.4": "rocm", |
51 | 53 | "linux.rocm.gpu.gfx942.8": "rocm", |
52 | 54 | "linux.24xl.spr-metal": "cpu", |
| 55 | + "linux.24xl.gnr": "cpu", |
53 | 56 | } |
54 | 57 |
|
55 | 58 | # All the different names vLLM uses to refer to their benchmark configs |
|
72 | 75 | "meta-llama/Meta-Llama-3.1-70B-Instruct": [ |
73 | 76 | "linux.dgx.b200", |
74 | 77 | "linux.rocm.gpu.gfx942", # TODO: Fail on ROCm |
| 78 | + "linux.24xl.gnr", |
75 | 79 | ], |
76 | 80 | "mistralai/Mixtral-8x7B-Instruct-v0.1": [ |
77 | 81 | "linux.dgx.b200", |
|
88 | 92 | "linux.aws.a100", |
89 | 93 | "linux.aws.h100", |
90 | 94 | "linux.rocm.gpu.gfx942", # TODO: Fail on ROCm |
| 95 | + "linux.24xl.gnr", |
91 | 96 | ], |
92 | 97 | "google/gemma-3-27b-it": [ |
93 | 98 | "linux.aws.a100", |
94 | 99 | "linux.aws.h100", |
95 | 100 | "linux.rocm.gpu.gfx942", # TODO (huydhn): Fail on ROCm |
| 101 | + "linux.24xl.gnr", |
96 | 102 | ], |
97 | 103 | "meta-llama/Llama-4-Scout-17B-16E-Instruct": [ |
98 | 104 | "linux.aws.a100", |
99 | 105 | "linux.aws.h100", |
100 | 106 | "linux.rocm.gpu.gfx942", # TODO: Fail on ROCm |
| 107 | + "linux.24xl.gnr", |
101 | 108 | ], |
102 | 109 | "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": [ |
103 | 110 | "linux.aws.a100", |
104 | 111 | "linux.aws.h100", |
105 | 112 | "linux.rocm.gpu.gfx942", # TODO (huydhn): Hang on ROCm |
| 113 | + "linux.24xl.gnr", |
106 | 114 | ], |
107 | 115 | # Run gpt-oss on both H100 and B200 |
108 | 116 | "openai/gpt-oss-20b": [ |
109 | 117 | "linux.aws.a100", |
| 118 | + "linux.24xl.gnr", |
110 | 119 | ], |
111 | 120 | "openai/gpt-oss-120b": [ |
112 | 121 | "linux.aws.a100", |
| 122 | + "linux.24xl.gnr", |
113 | 123 | ], |
114 | 124 | # Deepseek can only run on B200 |
115 | 125 | "deepseek-ai/DeepSeek-V3.1": [ |
116 | 126 | "linux.aws.a100", |
117 | 127 | "linux.aws.h100", |
| 128 | + "linux.24xl.gnr", |
118 | 129 | ], |
119 | 130 | "deepseek-ai/DeepSeek-V3.2-Exp": [ |
120 | 131 | "linux.aws.a100", |
121 | 132 | "linux.aws.h100", |
| 133 | + "linux.24xl.gnr", |
122 | 134 | ], |
123 | 135 | "deepseek-ai/DeepSeek-R1": [ |
124 | 136 | "linux.aws.a100", |
| 137 | + "linux.24xl.gnr", |
125 | 138 | "linux.aws.h100", |
126 | 139 | ], |
127 | 140 | } |
|
0 commit comments