Skip to content
This repository was archived by the owner on Aug 29, 2025. It is now read-only.

Commit ae2515e

Browse files
authored
feat: add gme-Qwen2-VL-7B-Instruct (#153)
* feat: add gme-Qwen2-VL-7B-Instruct * fix: revert vllm engine to 084v for gme qwen2vl
1 parent a3afb92 commit ae2515e

File tree

4 files changed

+56
-2
lines changed

4 files changed

+56
-2
lines changed

src/emd/models/embeddings/qwen.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,19 @@
11
from .. import Model
2-
from ..engines import vllm_qwen3_engin091
2+
from ..engines import vllm_qwen3_engin091, vllm_gme_qwen2vl_engine091, vllm_gme_qwen2vl_engine084_compat
33
from ..services import sagemaker_service,local_service,ecs_service
44
from ..frameworks import fastapi_framework
55
from ..instances import (
66
g5dxlarge_instance,
77
g5d2xlarge_instance,
88
g5d4xlarge_instance,
99
g5d8xlarge_instance,
10+
g5d12xlarge_instance,
1011
g5d16xlarge_instance,
1112
local_instance
1213
)
1314
from emd.models.utils.constants import ModelType
1415
from emd.models import ModelSeries
15-
from ..model_series import QWEN3_SERIES
16+
from ..model_series import QWEN3_SERIES, GME_SERIES
1617

1718

1819
Model.register(
@@ -104,3 +105,33 @@
104105
model_series=QWEN3_SERIES
105106
)
106107
)
108+
109+
Model.register(
110+
dict(
111+
model_id = "gme-Qwen2-VL-7B-Instruct",
112+
supported_engines=[vllm_gme_qwen2vl_engine084_compat],
113+
supported_instances=[
114+
g5d4xlarge_instance,
115+
g5d8xlarge_instance,
116+
g5d12xlarge_instance,
117+
g5d16xlarge_instance,
118+
local_instance
119+
],
120+
supported_services=[
121+
sagemaker_service,
122+
ecs_service,
123+
local_service
124+
],
125+
supported_frameworks=[
126+
fastapi_framework
127+
],
128+
allow_china_region=True,
129+
huggingface_model_id="Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
130+
modelscope_model_id="Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
131+
require_huggingface_token=False,
132+
application_scenario="Multimodal RAG, image-text retrieval, visual search",
133+
description="General Multimodal Embedding model based on Qwen2-VL architecture, supporting text, image, and image-text pair inputs for unified multimodal representation learning and retrieval tasks. Uses vLLM v0.8.4 for transformers compatibility.",
134+
model_type=ModelType.EMBEDDING,
135+
model_series=GME_SERIES
136+
)
137+
)

src/emd/models/engines.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,22 @@ class KtransformersEngine(OpenAICompitableEngine):
183183
"default_cli_args": " --max_num_seq 30 --disable-log-stats --trust-remote-code --task embed"
184184
})
185185

186+
vllm_gme_qwen2vl_engine091 = VllmEngine(**{
187+
**vllm_embedding_engine091.model_dump(),
188+
"environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
189+
"default_cli_args": " --max_num_seq 20 --disable-log-stats --trust-remote-code --task embed --limit-mm-per-prompt image=10 --gpu_memory_utilization 0.8",
190+
"description": "VLLM engine for GME multimodal embedding models based on Qwen2-VL"
191+
})
192+
193+
# GME-compatible engine with transformers 4.51.3
194+
vllm_gme_qwen2vl_engine084_compat = VllmEngine(**{
195+
**vllm_embedding_engine091.model_dump(),
196+
"engine_dockerfile_config": {"VERSION":"v0.8.4"},
197+
"environment_variables": "export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
198+
"default_cli_args": " --max_num_seq 20 --disable-log-stats --trust-remote-code --task embed --limit-mm-per-prompt image=10 --gpu_memory_utilization 0.8",
199+
"description": "VLLM engine v0.8.4 for GME multimodal embedding models with compatible transformers version"
200+
})
201+
186202

187203
vllm_qwen2vl72b_engine064 = VllmEngine(**{
188204
**vllm_engine064.model_dump(),

src/emd/models/model_series.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,9 @@
151151
description="Baichuan Intelligent Technology.",
152152
reference_link="https://github.com/baichuan-inc"
153153
)
154+
155+
GME_SERIES = ModelSeries(
156+
model_series_name=ModelSeriesType.GME,
157+
description="General Multimodal Embedding (GME) models based on Qwen2-VL architecture, designed for unified multimodal representation learning supporting text, image, and image-text pair inputs for retrieval and search applications.",
158+
reference_link="https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct"
159+
)

src/emd/models/utils/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ class ModelSeriesType(ConstantBase):
227227
BCE = "bce"
228228
COMFYUI = "comfyui"
229229
QWEN2VL = "qwen2vl"
230+
GME = "gme"
230231
AGENT = "agent"
231232
INTERNVL25 = "internvl2.5"
232233
LLAMA = "llama"

0 commit comments

Comments
 (0)