Skip to content

Commit 20a5a5a

Browse files
committed
Improve documentation and logging in API server
Signed-off-by: fangyuchu <fangyuchu@qq.com>
1 parent 3b203d6 commit 20a5a5a

File tree

4 files changed

+42
-28
lines changed

4 files changed

+42
-28
lines changed

tests/v1/engine/test_client_guard.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
def create_test_thread_safe_dict(initial_data=None):
2424
if initial_data is None:
2525
initial_data = {1: "Healthy"}
26-
if initial_data is None:
27-
initial_data = {1: "Healthy"}
26+
2827
tsd = ThreadSafeDict()
2928
if initial_data:
3029
for k, v in initial_data.items():
@@ -219,4 +218,5 @@ def response_cmd(cmd_socket):
219218
assert result is True
220219
assert engine_status_dict[0] == "Healthy"
221220

221+
cmd_socket.close()
222222
guard.shutdown_guard()

vllm/engine/arg_utils.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,10 +1775,8 @@ def create_engine_config(
17751775
fault_tolerance_config = FaultToleranceConfig(
17761776
enable_fault_tolerance=self.enable_fault_tolerance,
17771777
engine_recovery_timeout=self.engine_recovery_timeout,
1778-
internal_fault_report_port=self.internal_fault_report_port
1779-
or FaultToleranceConfig.internal_fault_report_port,
1780-
external_fault_notify_port=self.external_fault_notify_port
1781-
or FaultToleranceConfig.external_fault_notify_port,
1778+
internal_fault_report_port=self.internal_fault_report_port,
1779+
external_fault_notify_port=self.external_fault_notify_port,
17821780
gloo_comm_timeout=self.gloo_comm_timeout,
17831781
)
17841782

vllm/entrypoints/api_server.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from collections.abc import AsyncGenerator
1616
from typing import Any
1717

18-
from fastapi import FastAPI, Request
18+
from fastapi import FastAPI, HTTPException, Request
1919
from fastapi.responses import JSONResponse, Response, StreamingResponse
2020

2121
import vllm.envs as envs
@@ -57,23 +57,38 @@ async def generate(request: Request) -> Response:
5757

5858

5959
@app.post("/fault_tolerance/apply")
60-
async def send_fault_tolerance_instruction(request: Request) -> Response:
61-
"""Generate completion for the request.
60+
async def process_fault_tolerance_instruction(request: Request) -> Response:
61+
"""Apply fault tolerance instructions to the engine.
62+
63+
This endpoint handles fault recovery operations such as retrying operations.
6264
6365
The request should be a JSON object with the following fields:
64-
- prompt: the prompt to use for the generation.
65-
- stream: whether to stream the results or not.
66-
- other fields: the sampling parameters (See `SamplingParams` for details).
66+
- fault_tolerance_instruction: The name of fault tolerance method.
67+
- fault_tolerance_timeout: Timeout in seconds for the operation to complete.
68+
- fault_tolerance_params: dict, optional. Additional dynamic parameters for
69+
the fault tolerance operation.
6770
"""
6871
request_dict = await request.json()
6972

7073
fault_tolerance_instruction = request_dict.get("fault_tolerance_instruction")
7174
fault_tolerance_timeout = request_dict.get("fault_tolerance_timeout")
72-
kwargs = request_dict.get("kwargs", {})
75+
kwargs = request_dict.get("fault_tolerance_params", {})
7376
assert engine is not None
74-
return await engine.handle_fault(
77+
success = await engine.handle_fault(
7578
fault_tolerance_instruction, fault_tolerance_timeout, **kwargs
7679
)
80+
if success:
81+
return JSONResponse(
82+
status_code=200,
83+
content={"message": "Instruction executed successfully."},
84+
)
85+
86+
logger.error("Fault tolerance operation failed. Shutting down the engine.")
87+
engine.shutdown()
88+
raise HTTPException(
89+
status_code=400,
90+
detail="Instruction execution failed.",
91+
)
7792

7893

7994
@app.get("/fault_tolerance/status")

vllm/entrypoints/openai/api_server.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,7 @@ async def is_scaling_elastic_ep(raw_request: Request):
12351235
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
12361236
},
12371237
)
1238-
async def send_fault_tolerance_instruction(raw_request: Request):
1238+
async def process_fault_tolerance_instruction(raw_request: Request):
12391239
try:
12401240
body = await raw_request.json()
12411241
except json.JSONDecodeError as e:
@@ -1250,50 +1250,51 @@ async def send_fault_tolerance_instruction(raw_request: Request):
12501250
if fault_tolerance_instruction is None or fault_tolerance_timeout is None:
12511251
raise HTTPException(
12521252
status_code=400,
1253-
detail="fault_tolerance_instruction and"
1254-
" fault_tolerance_timeout is required",
1253+
detail="Both 'fault_tolerance_instruction' and "
1254+
"'fault_tolerance_timeout' are required.",
12551255
)
12561256

12571257
if not isinstance(fault_tolerance_instruction, str):
12581258
raise HTTPException(
1259-
status_code=400, detail="fault_tolerance_instruction must be a str"
1259+
status_code=400, detail="'fault_tolerance_instruction' must be a string."
12601260
)
1261-
# Currently, only two types of instructions are supported: [pause, retry].
1262-
# Additional descaling instructions will be supported in future updates.
1261+
# Supported instructions: ["pause", "retry"].
1262+
# More instruction types may be added in future updates.
12631263
elif fault_tolerance_instruction not in ["pause", "retry"]:
12641264
raise HTTPException(
1265-
status_code=400, detail="not a valid fault_tolerance_instruction"
1265+
status_code=400, detail="Invalid 'fault_tolerance_instruction' value."
12661266
)
12671267

12681268
if not isinstance(fault_tolerance_timeout, int) or fault_tolerance_timeout <= 0:
12691269
raise HTTPException(
1270-
status_code=400, detail="fault_tolerance_timeout must be a positive integer"
1270+
status_code=400,
1271+
detail="'fault_tolerance_timeout' must be a positive integer.",
12711272
)
12721273
try:
1273-
execute_result = await client.handle_fault(
1274+
success = await client.handle_fault(
12741275
fault_tolerance_instruction,
12751276
fault_tolerance_timeout,
12761277
**dynamic_fault_tolerance_params,
12771278
)
1278-
if execute_result:
1279+
if success:
12791280
return JSONResponse(
12801281
{
1281-
"message": "instruction has been executed successfully",
1282+
"message": "Instruction executed successfully.",
12821283
}
12831284
)
12841285
else:
1285-
logger.error("Fault tolerance failed, shutdown the app.")
1286+
logger.error("Fault tolerance failed. Shutting down the application.")
12861287
client.shutdown()
12871288
raise HTTPException(
12881289
status_code=400,
12891290
detail="Instruction execution failed.",
12901291
)
12911292

12921293
except Exception as e:
1293-
logger.error("Handle fault failed: %s", e)
1294+
logger.error("Failed to handle fault: %s", e)
12941295
raise HTTPException(
12951296
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value,
1296-
detail="Handle fault failed",
1297+
detail="Failed to handle fault.",
12971298
) from e
12981299

12991300

0 commit comments

Comments
 (0)