File tree Expand file tree Collapse file tree 3 files changed +9
-0
lines changed Expand file tree Collapse file tree 3 files changed +9
-0
lines changed Original file line number Diff line number Diff line change 214
214
| [ swift/Qwen3-235B-A22B-AWQ] ( https://modelscope.cn/models/swift/Qwen3-235B-A22B-AWQ ) | qwen3_moe| qwen3| transformers>=4.51| ✘ ; | -| [ cognitivecomputations/Qwen3-235B-A22B-AWQ] ( https://huggingface.co/cognitivecomputations/Qwen3-235B-A22B-AWQ ) |
215
215
| [ Qwen/Qwen3-235B-A22B-Instruct-2507] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Instruct-2507 ) | qwen3_moe| qwen3| transformers>=4.51| ✔ ; | -| [ Qwen/Qwen3-235B-A22B-Instruct-2507] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507 ) |
216
216
| [ Qwen/Qwen3-235B-A22B-Instruct-2507-FP8] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8 ) | qwen3_moe| qwen3| transformers>=4.51| ✘ ; | -| [ Qwen/Qwen3-235B-A22B-Instruct-2507-FP8] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8 ) |
217
+ | [ Qwen/Qwen3-Coder-480B-A35B-Instruct] ( https://modelscope.cn/models/Qwen/Qwen3-Coder-480B-A35B-Instruct ) | qwen3_moe| qwen3| transformers>=4.51| ✔ ; | coding| [ Qwen/Qwen3-Coder-480B-A35B-Instruct] ( https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct ) |
218
+ | [ Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8] ( https://modelscope.cn/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 ) | qwen3_moe| qwen3| transformers>=4.51| ✘ ; | coding| [ Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8] ( https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 ) |
217
219
| [ Qwen/Qwen3-Embedding-0.6B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-0.6B] ( https://huggingface.co/Qwen/Qwen3-Embedding-0.6B ) |
218
220
| [ Qwen/Qwen3-Embedding-4B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-4B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-4B] ( https://huggingface.co/Qwen/Qwen3-Embedding-4B ) |
219
221
| [ Qwen/Qwen3-Embedding-8B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-8B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-8B] ( https://huggingface.co/Qwen/Qwen3-Embedding-8B ) |
Original file line number Diff line number Diff line change @@ -214,6 +214,8 @@ The table below introduces the models integrated with ms-swift:
214
214
| [ swift/Qwen3-235B-A22B-AWQ] ( https://modelscope.cn/models/swift/Qwen3-235B-A22B-AWQ ) | qwen3_moe| qwen3| transformers>=4.51| ✘ ; | -| [ cognitivecomputations/Qwen3-235B-A22B-AWQ] ( https://huggingface.co/cognitivecomputations/Qwen3-235B-A22B-AWQ ) |
215
215
| [ Qwen/Qwen3-235B-A22B-Instruct-2507] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Instruct-2507 ) | qwen3_moe| qwen3| transformers>=4.51| ✔ ; | -| [ Qwen/Qwen3-235B-A22B-Instruct-2507] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507 ) |
216
216
| [ Qwen/Qwen3-235B-A22B-Instruct-2507-FP8] ( https://modelscope.cn/models/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8 ) | qwen3_moe| qwen3| transformers>=4.51| ✘ ; | -| [ Qwen/Qwen3-235B-A22B-Instruct-2507-FP8] ( https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507-FP8 ) |
217
+ | [ Qwen/Qwen3-Coder-480B-A35B-Instruct] ( https://modelscope.cn/models/Qwen/Qwen3-Coder-480B-A35B-Instruct ) | qwen3_moe| qwen3| transformers>=4.51| ✔ ; | coding| [ Qwen/Qwen3-Coder-480B-A35B-Instruct] ( https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct ) |
218
+ | [ Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8] ( https://modelscope.cn/models/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 ) | qwen3_moe| qwen3| transformers>=4.51| ✘ ; | coding| [ Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8] ( https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8 ) |
217
219
| [ Qwen/Qwen3-Embedding-0.6B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-0.6B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-0.6B] ( https://huggingface.co/Qwen/Qwen3-Embedding-0.6B ) |
218
220
| [ Qwen/Qwen3-Embedding-4B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-4B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-4B] ( https://huggingface.co/Qwen/Qwen3-Embedding-4B ) |
219
221
| [ Qwen/Qwen3-Embedding-8B] ( https://modelscope.cn/models/Qwen/Qwen3-Embedding-8B ) | qwen3_emb| qwen3_emb| -| ✘ ; | -| [ Qwen/Qwen3-Embedding-8B] ( https://huggingface.co/Qwen/Qwen3-Embedding-8B ) |
Original file line number Diff line number Diff line change @@ -556,6 +556,11 @@ def _get_cast_dtype(self) -> torch.dtype:
556
556
Model ('Qwen/Qwen3-235B-A22B-Instruct-2507' , 'Qwen/Qwen3-235B-A22B-Instruct-2507' ),
557
557
Model ('Qwen/Qwen3-235B-A22B-Instruct-2507-FP8' , 'Qwen/Qwen3-235B-A22B-Instruct-2507-FP8' ),
558
558
]),
559
+ ModelGroup ([
560
+ Model ('Qwen/Qwen3-Coder-480B-A35B-Instruct' , 'Qwen/Qwen3-Coder-480B-A35B-Instruct' ),
561
+ Model ('Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8' , 'Qwen/Qwen3-Coder-480B-A35B-Instruct-FP8' ),
562
+ ],
563
+ tags = ['coding' ]),
559
564
],
560
565
TemplateType .qwen3 ,
561
566
get_model_tokenizer_with_flash_attn ,
You can’t perform that action at this time.
0 commit comments