Skip to content

Commit 3f30625

Browse files
committed
Switch from field to Field
Signed-off-by: fangyuchu <fangyuchu@qq.com>
1 parent c8bb522 commit 3f30625

File tree

4 files changed

+9
-7
lines changed

4 files changed

+9
-7
lines changed

vllm/config/vllm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import threading
1010
import time
1111
from contextlib import contextmanager
12-
from dataclasses import is_dataclass, field, replace
12+
from dataclasses import is_dataclass, replace
1313
from datetime import datetime
1414
from enum import IntEnum
1515
from functools import lru_cache
@@ -206,7 +206,7 @@ class VllmConfig:
206206
"""The configurations for event publishing."""
207207
ec_transfer_config: ECTransferConfig | None = None
208208
"""The configurations for distributed EC cache transfer."""
209-
fault_tolerance_config: FaultToleranceConfig = field(
209+
fault_tolerance_config: FaultToleranceConfig = Field(
210210
default_factory=FaultToleranceConfig
211211
)
212212
"""The configurations for fault tolerance."""

vllm/distributed/parallel_state.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -346,9 +346,7 @@ def __init__(
346346
# processes through the CPU.
347347
with suppress_stdout():
348348
if not enable_fault_tolerance:
349-
cpu_group = torch.distributed.new_group(
350-
ranks, backend="gloo"
351-
)
349+
cpu_group = torch.distributed.new_group(ranks, backend="gloo")
352350
else:
353351
cpu_group = torch.distributed.new_group(
354352
ranks, backend="gloo", timeout=gloo_comm_timeout
@@ -1151,6 +1149,7 @@ def get_pcp_group() -> GroupCoordinator:
11511149
assert _PCP is not None, "prefill context parallel group is not initialized"
11521150
return _PCP
11531151

1152+
11541153
def get_all_model_groups() -> list[GroupCoordinator]:
11551154
group_list = []
11561155
global _TP
@@ -1179,6 +1178,7 @@ def get_all_model_groups() -> list[GroupCoordinator]:
11791178

11801179
return group_list
11811180

1181+
11821182
@contextmanager
11831183
def graph_capture(device: torch.device):
11841184
"""

vllm/v1/engine/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ def close(self):
215215

216216
def start_engine_core_monitor(self):
217217
sentinels = [proc.sentinel for proc in self.processes]
218-
while self.processes:
218+
while sentinels:
219219
died = multiprocessing.connection.wait(sentinels)
220220
for sentinel in died:
221221
died_proc = next(
@@ -239,7 +239,7 @@ def start_engine_core_monitor(self):
239239
sentinels.remove(sentinel)
240240
logger.error(
241241
"Engine core proc %s died unexpectedly",
242-
died_proc,
242+
died_proc.name,
243243
)
244244

245245
def join_first(self):

vllm/v1/worker/gpu_worker.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,7 @@ def _abort_nccl_comm(group: GroupCoordinator):
174174
if group.device_communicator is not None:
175175
device_comm = cast(CudaCommunicator, group.device_communicator)
176176
nccl_comm = device_comm.pynccl_comm
177+
assert nccl_comm is not None
177178
nccl_comm.nccl_abort_comm()
178179

179180
def _abort_process_group(group: GroupCoordinator):
@@ -223,6 +224,7 @@ def _set_device_communicator_status(self, active: bool):
223224
if group.device_communicator is not None:
224225
device_comm = cast(CudaCommunicator, group.device_communicator)
225226
nccl_comm = device_comm.pynccl_comm
227+
assert nccl_comm is not None
226228
nccl_comm.available = active
227229
nccl_comm.disabled = not active
228230

0 commit comments

Comments
 (0)