Skip to content

Commit 5bc7a6a

Browse files
authored
fix: disable sagemaker deployment for bge-vl (#160)
* fix: disable sagemaker deployment for bge-vl * fix: code cleanup
1 parent a134880 commit 5bc7a6a

File tree

2 files changed

+19
-21
lines changed

2 files changed

+19
-21
lines changed

src/emd/models/embeddings/bge_vl.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,7 @@
5757
local_instance,
5858
],
5959
supported_services=[
60-
sagemaker_service,
61-
ecs_service,
62-
local_service
60+
ecs_service
6361
],
6462
supported_frameworks=[
6563
fastapi_framework
@@ -73,4 +71,4 @@
7371
model_series=BGE_SERIES,
7472
description="BGE-VL-large is a larger multimodal embedding model that supports text, image, and text-image pair inputs for high-performance multimodal representation learning and cross-modal retrieval tasks."
7573
)
76-
)
74+
)

src/pipeline/backend/huggingface/embedding/transformers_embedding_backend.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,15 +65,15 @@ def start(self):
6565
device_map="cuda",
6666
**self.pretrained_model_init_kwargs
6767
)
68-
68+
6969
# BGE-VL specific initialization
7070
if self.is_bge_vl:
7171
try:
7272
self.model.set_processor(model_abs_path)
7373
logger.info(f"BGE-VL processor set successfully for model: {self.model_id}")
7474
except Exception as e:
7575
logger.warning(f"Failed to set BGE-VL processor: {e}")
76-
76+
7777
logger.info(f"model: {self.model}")
7878
# TODO add tokenizer init args from model's definition
7979
# self.tokenizer = AutoTokenizer.from_pretrained(
@@ -106,20 +106,20 @@ def _process_base64_image(self, image_data: str) -> Image.Image:
106106
# Handle data URL format
107107
if image_data.startswith('data:image'):
108108
image_data = image_data.split(',')[1]
109-
109+
110110
# Decode base64
111111
image_bytes = base64.b64decode(image_data)
112112
image = Image.open(io.BytesIO(image_bytes))
113-
113+
114114
# Convert to RGB if needed
115115
if image.mode != 'RGB':
116116
image = image.convert('RGB')
117-
117+
118118
return image
119119
except Exception as e:
120120
logger.error(f"Failed to process base64 image: {e}")
121121
raise ValueError(f"Invalid image data: {e}")
122-
122+
123123
def _convert_pil_to_bytesio(self, pil_image: Image.Image) -> io.BytesIO:
124124
"""Convert PIL Image to BytesIO object for BGE-VL compatibility"""
125125
try:
@@ -131,13 +131,13 @@ def _convert_pil_to_bytesio(self, pil_image: Image.Image) -> io.BytesIO:
131131
except Exception as e:
132132
logger.error(f"Failed to convert PIL image to BytesIO: {e}")
133133
raise ValueError(f"Image conversion failed: {e}")
134-
134+
135135
def _parse_multimodal_inputs(self, inputs):
136136
"""Parse and categorize multimodal inputs for BGE-VL"""
137137
text_inputs = []
138138
image_inputs = []
139139
multimodal_inputs = []
140-
140+
141141
for inp in inputs:
142142
if isinstance(inp, str):
143143
# Simple text input
@@ -162,14 +162,14 @@ def _parse_multimodal_inputs(self, inputs):
162162
# Convert PIL Image to BytesIO for BGE-VL compatibility
163163
bytesio_image = self._convert_pil_to_bytesio(pil_image)
164164
multimodal_inputs.append((text, bytesio_image))
165-
165+
166166
return text_inputs, image_inputs, multimodal_inputs
167-
167+
168168
def _generate_bge_vl_embeddings(self, inputs):
169169
"""Generate embeddings using BGE-VL model"""
170170
text_inputs, image_inputs, multimodal_inputs = self._parse_multimodal_inputs(inputs)
171171
all_embeddings = []
172-
172+
173173
# Process text-only inputs
174174
if text_inputs:
175175
try:
@@ -182,7 +182,7 @@ def _generate_bge_vl_embeddings(self, inputs):
182182
except Exception as e:
183183
logger.error(f"Failed to encode text inputs: {e}")
184184
raise ValueError(f"BGE-VL text encoding failed: {e}")
185-
185+
186186
# Process image-only inputs
187187
if image_inputs:
188188
try:
@@ -195,7 +195,7 @@ def _generate_bge_vl_embeddings(self, inputs):
195195
except Exception as e:
196196
logger.error(f"Failed to encode image inputs: {e}")
197197
raise ValueError(f"BGE-VL image encoding failed: {e}")
198-
198+
199199
# Process multimodal inputs (text + image)
200200
if multimodal_inputs:
201201
for text, bytesio_image in multimodal_inputs:
@@ -209,7 +209,7 @@ def _generate_bge_vl_embeddings(self, inputs):
209209
except Exception as e:
210210
logger.error(f"Failed to encode multimodal input: {e}")
211211
raise ValueError(f"BGE-VL multimodal encoding failed: {e}")
212-
212+
213213
return all_embeddings
214214

215215
def invoke(self, request:dict):
@@ -219,7 +219,7 @@ def invoke(self, request:dict):
219219

220220
logger.info(f'request: {request}')
221221
t0 = time.time()
222-
222+
223223
if self.is_bge_vl:
224224
# Use BGE-VL multimodal processing
225225
embeddings_list = self._generate_bge_vl_embeddings(inputs)
@@ -229,10 +229,10 @@ def invoke(self, request:dict):
229229
truncate_dim = request.get('truncate_dim', None)
230230
embeddings = self.model.encode(inputs, task=task, truncate_dim=truncate_dim)
231231
embeddings_list = embeddings.tolist()
232-
232+
233233
logger.info(f'embeddings generated, count: {len(embeddings_list)}, elapsed time: {time.time()-t0}')
234234
return self.format_openai_response(embeddings_list)
235-
235+
236236
async def ainvoke(self, request: dict):
237237
"""Async version of invoke method"""
238238
return self.invoke(request)

0 commit comments

Comments
 (0)