Skip to content

Commit 17382fa

Browse files
Load vLLM with retries
1 parent 7050e0f commit 17382fa

File tree

1 file changed

+11
-2
lines changed

1 file changed

+11
-2
lines changed

src/fmcore/algorithm/vllm.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
from typing import Any, Dict, List, Optional, Union
33

44
import numpy as np
5+
import requests
56
from bears import FileMetadata
6-
from bears.util import EnvUtil, get_default, ignore_warnings, optional_dependency, set_param_from_alias
7+
from bears.util import EnvUtil, get_default, ignore_warnings, optional_dependency, retry, set_param_from_alias
78
from pydantic import confloat, conint, model_validator
89

910
from fmcore.framework._task.text_generation import (
@@ -16,6 +17,7 @@
1617
)
1718

1819
with optional_dependency("vllm"):
20+
from huggingface_hub.errors import HfHubHTTPError
1921
from vllm import LLM, SamplingParams
2022

2123
os.environ["VLLM_LOGGING_LEVEL"] = "WARNING"
@@ -97,7 +99,14 @@ def initialize(self, model_dir: Optional[FileMetadata] = None):
9799
print(f"Initializing vllm with kwargs: {kwargs}")
98100

99101
with ignore_warnings():
100-
self.llm = LLM(**kwargs)
102+
self.llm = retry(
103+
LLM,
104+
retries=10,
105+
wait=30,
106+
jitter=0.5,
107+
retryable_exceptions=(requests.exceptions.ReadTimeout, HfHubHTTPError),
108+
**kwargs,
109+
)
101110

102111
def predict_step(self, batch: Prompts, **kwargs) -> Dict:
103112
"""Run prediction on a batch of prompts"""

0 commit comments

Comments
 (0)