Skip to content

Commit 4329fce

Browse files
authored
feat: add deepseek-ai/DeepSeek-R1-0528-Qwen3-8B (#143)
* feat: add deepseek-ai/DeepSeek-R1-0528-Qwen3-8B * feat: update support models * feat: update vllm versions
1 parent 7af2274 commit 4329fce

File tree

3 files changed

+39
-0
lines changed

3 files changed

+39
-0
lines changed

docs/en/supported_models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
| deepseek-r1-671b-2.51bit_gguf | deepseek reasoning model | llm | g5.12xlarge,g5.16xlarge,g5.24xlarge,g5.48xlarge,g6.12xlarge,g6.16xlarge,g6.24xlarge,g6.48xlarge,g6e.8xlarge,g6e.12xlarge,g6e.16xlarge,g6e.24xlarge,g6e.48xlarge | sagemaker_realtime,sagemaker_async,ecs ||
4949
| DeepSeek-R1 | deepseek reasoning model | llm | | ||
5050
| deepseek-r1-671b-4bit_gguf | deepseek reasoning model | llm | g5.24xlarge,g5.48xlarge,g6.24xlarge,g6.48xlarge,g6e.16xlarge,g6e.24xlarge,g6e.48xlarge | sagemaker_realtime,sagemaker_async,ecs ||
51+
| DeepSeek-R1-0528-Qwen3-8B | deepseek reasoning model | llm | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge | sagemaker_realtime,sagemaker_async,ecs ||
5152
| deepseek-v3-UD-IQ1_M_ollama | deepseek v3 | llm | g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs ||
5253
| Baichuan-M1-14B-Instruct | baichuan | llm | g5.12xlarge,g5.24xlarge,g5.48xlarge | sagemaker_realtime,sagemaker_async,ecs ||
5354
| ReaderLM-v2 | jina | llm | g4dn.2xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge,inf2.8xlarge | sagemaker_realtime,sagemaker_async,ecs ||

src/emd/models/engines.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,13 @@ class KtransformersEngine(OpenAICompitableEngine):
118118
"default_cli_args": "--max_num_seq 256 --max_model_len 16000 --chat-template emd/models/chat_templates/deepseek_r1_distill.jinja"
119119
})
120120

121+
vllm_deepseek_r1_distill_qwen_engine085 = VllmEngine(**{
122+
**vllm_engine064.model_dump(),
123+
"engine_dockerfile_config": {"VERSION":"v0.8.5"},
124+
"default_cli_args": "--max_num_seq 256 --max_model_len 16000 --chat-template emd/models/chat_templates/deepseek_r1_distill.jinja"
125+
})
126+
127+
121128
vllm_deepseek_r1_distill_llama_engine071 = vllm_deepseek_r1_distill_qwen_engine071
122129

123130
vllm_deepseek_r1_engine084 = VllmEngine(**{

src/emd/models/llms/deepseek.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from .. import Model
22
from ..engines import (
33
vllm_deepseek_r1_distill_qwen_engine071,
4+
vllm_deepseek_r1_distill_qwen_engine085,
45
vllm_deepseek_r1_distill_llama_engine071,
56
ollama_deepseek_r1_qwen2d5_1d5b_engine057,
67
llama_cpp_deepseek_r1_1d58_bit_engine_b9ab0a4,
@@ -17,6 +18,7 @@
1718
)
1819
from ..frameworks import fastapi_framework
1920
from ..instances import (
21+
g5dxlarge_instance,
2022
g5d2xlarge_instance,
2123
g5d4xlarge_instance,
2224
g5d8xlarge_instance,
@@ -303,6 +305,35 @@
303305
)
304306
)
305307

308+
Model.register(
309+
dict(
310+
model_id = "DeepSeek-R1-0528-Qwen3-8B",
311+
supported_engines=[vllm_deepseek_r1_distill_qwen_engine085],
312+
supported_instances=[
313+
g5dxlarge_instance,
314+
g5d2xlarge_instance,
315+
g5d4xlarge_instance,
316+
local_instance
317+
],
318+
supported_services=[
319+
sagemaker_service,
320+
sagemaker_async_service,
321+
ecs_service,
322+
local_service
323+
],
324+
supported_frameworks=[
325+
fastapi_framework
326+
],
327+
allow_china_region=True,
328+
huggingface_model_id="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
329+
modelscope_model_id="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
330+
require_huggingface_token=False,
331+
application_scenario="Agent, tool use, translation, summary",
332+
description="DeepSeek R1 got a minor upgrade (now DeepSeek-R1-0528). It does great in math, programming, and logic tests, almost as good as top models like O3 and Gemini 2.5 Pro.",
333+
model_type=ModelType.LLM,
334+
model_series=DEEPSEEK_REASONING_MODEL
335+
)
336+
)
306337

307338
Model.register(
308339
dict(

0 commit comments

Comments
 (0)