Skip to content

Commit 1ff5900

Browse files
authored
feat: add Qwen/Qwen3-Embedding and vllm to v0.9.1 (#146)
* feat: add Qwen/Qwen3-Embedding and vllm to v0.9.1 * chore: code cleanup
1 parent 7f51de4 commit 1ff5900

File tree

5 files changed

+122
-0
lines changed

5 files changed

+122
-0
lines changed

docs/en/supported_models.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@
7272
| bge-m3 | bge | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
7373
| jina-embeddings-v3 | jina | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
7474
| jina-embeddings-v4 | jina | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
75+
| Qwen3-Embedding-0.6B | qwen3 | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
76+
| Qwen3-Embedding-4B | qwen3 | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
77+
| Qwen3-Embedding-8B | qwen3 | embedding | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
7578
| bge-reranker-v2-m3 | bge | rerank | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
7679
| bge-reranker-large | bge | rerank | g4dn.2xlarge,g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||
7780
| jina-reranker-v2-base-multilingual | jina | rerank | g5.xlarge,g5.2xlarge,g5.4xlarge,g5.8xlarge,g5.16xlarge | sagemaker_realtime,ecs ||

src/emd/models/embeddings/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from . import bert_embedding
22
from . import jina
3+
from . import qwen

src/emd/models/embeddings/qwen.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from .. import Model
2+
from ..engines import vllm_qwen3_engin091
3+
from ..services import sagemaker_service,local_service,ecs_service
4+
from ..frameworks import fastapi_framework
5+
from ..instances import (
6+
g5dxlarge_instance,
7+
g5d2xlarge_instance,
8+
g5d4xlarge_instance,
9+
g5d8xlarge_instance,
10+
g5d16xlarge_instance,
11+
local_instance
12+
)
13+
from emd.models.utils.constants import ModelType
14+
from emd.models import ModelSeries
15+
from ..model_series import QWEN3_SERIES
16+
17+
18+
Model.register(
19+
dict(
20+
model_id = "Qwen3-Embedding-0.6B",
21+
supported_engines=[vllm_qwen3_engin091],
22+
supported_instances=[
23+
g5dxlarge_instance,
24+
g5d2xlarge_instance,
25+
g5d4xlarge_instance,
26+
g5d8xlarge_instance,
27+
g5d16xlarge_instance,
28+
local_instance
29+
],
30+
supported_services=[
31+
sagemaker_service,
32+
ecs_service,
33+
local_service
34+
],
35+
supported_frameworks=[
36+
fastapi_framework
37+
],
38+
allow_china_region=True,
39+
huggingface_model_id="Qwen/Qwen3-Embedding-0.6B",
40+
modelscope_model_id="Qwen/Qwen3-Embedding-0.6B",
41+
require_huggingface_token=False,
42+
application_scenario="RAG",
43+
model_type=ModelType.EMBEDDING,
44+
model_series=QWEN3_SERIES
45+
)
46+
)
47+
48+
Model.register(
49+
dict(
50+
model_id = "Qwen3-Embedding-4B",
51+
supported_engines=[vllm_qwen3_engin091],
52+
supported_instances=[
53+
g5dxlarge_instance,
54+
g5d2xlarge_instance,
55+
g5d4xlarge_instance,
56+
g5d8xlarge_instance,
57+
g5d16xlarge_instance,
58+
local_instance
59+
],
60+
supported_services=[
61+
sagemaker_service,
62+
ecs_service,
63+
local_service
64+
],
65+
supported_frameworks=[
66+
fastapi_framework
67+
],
68+
allow_china_region=True,
69+
huggingface_model_id="Qwen/Qwen3-Embedding-4B",
70+
modelscope_model_id="Qwen/Qwen3-Embedding-4B",
71+
require_huggingface_token=False,
72+
application_scenario="RAG",
73+
model_type=ModelType.EMBEDDING,
74+
model_series=QWEN3_SERIES
75+
)
76+
)
77+
78+
Model.register(
79+
dict(
80+
model_id = "Qwen3-Embedding-8B",
81+
supported_engines=[vllm_qwen3_engin091],
82+
supported_instances=[
83+
g5dxlarge_instance,
84+
g5d2xlarge_instance,
85+
g5d4xlarge_instance,
86+
g5d8xlarge_instance,
87+
g5d16xlarge_instance,
88+
local_instance
89+
],
90+
supported_services=[
91+
sagemaker_service,
92+
ecs_service,
93+
local_service
94+
],
95+
supported_frameworks=[
96+
fastapi_framework
97+
],
98+
allow_china_region=True,
99+
huggingface_model_id="Qwen/Qwen3-Embedding-8B",
100+
modelscope_model_id="Qwen/Qwen3-Embedding-8B",
101+
require_huggingface_token=False,
102+
application_scenario="RAG",
103+
model_type=ModelType.EMBEDDING,
104+
model_series=QWEN3_SERIES
105+
)
106+
)

src/emd/models/engines.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,13 @@ class KtransformersEngine(OpenAICompitableEngine):
169169
"default_cli_args": " --max_model_len 16000 --max_num_seq 30 --disable-log-stats --enable-reasoning --reasoning-parser qwen3 --enable-auto-tool-choice --tool-call-parser hermes --enable-prefix-caching"
170170
})
171171

172+
vllm_qwen3_engin091 = VllmEngine(**{
173+
**vllm_engine064.model_dump(),
174+
"engine_dockerfile_config": {"VERSION":"v0.9.1"},
175+
"environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
176+
"default_cli_args": " --max_model_len 16000 --max_num_seq 30 --disable-log-stats --enable-reasoning --reasoning-parser qwen3 --enable-auto-tool-choice --tool-call-parser hermes --enable-prefix-caching"
177+
})
178+
172179

173180
vllm_qwen2vl72b_engine064 = VllmEngine(**{
174181
**vllm_engine064.model_dump(),

src/emd/models/model_series.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@
5353
reference_link="https://huggingface.co/jinaai"
5454
)
5555

56+
QWNE_EMBEDDING = ModelSeries(
57+
model_series_name=ModelSeriesType.JINA,
58+
description="Search foundation models: embeddings, rerankers, small LMs for better search",
59+
reference_link="https://huggingface.co/jinaai"
60+
)
5661

5762
QWEN2VL_SERIES = ModelSeries(
5863
model_series_name=ModelSeriesType.QWEN2VL,

0 commit comments

Comments
 (0)