Skip to content

Commit 9f496ca

Browse files
committed
Improve documentation and logging in API server
Signed-off-by: fangyuchu <fangyuchu@qq.com>
1 parent f22d8f5 commit 9f496ca

File tree

3 files changed

+39
-22
lines changed

3 files changed

+39
-22
lines changed

tests/v1/engine/test_client_guard.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,4 +219,5 @@ def response_cmd(cmd_socket):
219219
assert result is True
220220
assert engine_status_dict[0] == "Healthy"
221221

222+
cmd_socket.close()
222223
guard.shutdown_guard()

vllm/entrypoints/api_server.py

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from collections.abc import AsyncGenerator
1616
from typing import Any
1717

18-
from fastapi import FastAPI, Request
18+
from fastapi import FastAPI, HTTPException, Request
1919
from fastapi.responses import JSONResponse, Response, StreamingResponse
2020

2121
import vllm.envs as envs
@@ -57,23 +57,38 @@ async def generate(request: Request) -> Response:
5757

5858

5959
@app.post("/fault_tolerance/apply")
60-
async def send_fault_tolerance_instruction(request: Request) -> Response:
61-
"""Generate completion for the request.
60+
async def process_fault_tolerance_instruction(request: Request) -> Response:
61+
"""Apply fault tolerance instructions to the engine.
62+
63+
This endpoint handles fault recovery operations such as retrying operations.
6264
6365
The request should be a JSON object with the following fields:
64-
- prompt: the prompt to use for the generation.
65-
- stream: whether to stream the results or not.
66-
- other fields: the sampling parameters (See `SamplingParams` for details).
66+
- fault_tolerance_instruction: The name of fault tolerance method.
67+
- fault_tolerance_timeout: Timeout in seconds for the operation to complete.
68+
- fault_tolerance_params: dict, optional. Additional dynamic parameters for
69+
the fault tolerance operation.
6770
"""
6871
request_dict = await request.json()
6972

7073
fault_tolerance_instruction = request_dict.get("fault_tolerance_instruction")
7174
fault_tolerance_timeout = request_dict.get("fault_tolerance_timeout")
72-
kwargs = request_dict.get("kwargs", {})
75+
kwargs = request_dict.get("fault_tolerance_params", {})
7376
assert engine is not None
74-
return await engine.handle_fault(
77+
success = await engine.handle_fault(
7578
fault_tolerance_instruction, fault_tolerance_timeout, **kwargs
7679
)
80+
if success:
81+
return JSONResponse(
82+
status_code=200,
83+
content={"message": "Instruction executed successfully."},
84+
)
85+
86+
logger.error("Fault tolerance operation failed. Shutting down the engine.")
87+
engine.shutdown()
88+
raise HTTPException(
89+
status_code=400,
90+
detail="Instruction execution failed.",
91+
)
7792

7893

7994
@app.get("/fault_tolerance/status")

vllm/entrypoints/openai/api_server.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,7 +1232,7 @@ async def is_scaling_elastic_ep(raw_request: Request):
12321232
HTTPStatus.INTERNAL_SERVER_ERROR.value: {"model": ErrorResponse},
12331233
},
12341234
)
1235-
async def send_fault_tolerance_instruction(raw_request: Request):
1235+
async def process_fault_tolerance_instruction(raw_request: Request):
12361236
try:
12371237
body = await raw_request.json()
12381238
except json.JSONDecodeError as e:
@@ -1247,50 +1247,51 @@ async def send_fault_tolerance_instruction(raw_request: Request):
12471247
if fault_tolerance_instruction is None or fault_tolerance_timeout is None:
12481248
raise HTTPException(
12491249
status_code=400,
1250-
detail="fault_tolerance_instruction and"
1251-
" fault_tolerance_timeout is required",
1250+
detail="Both 'fault_tolerance_instruction' and "
1251+
"'fault_tolerance_timeout' are required.",
12521252
)
12531253

12541254
if not isinstance(fault_tolerance_instruction, str):
12551255
raise HTTPException(
1256-
status_code=400, detail="fault_tolerance_instruction must be a str"
1256+
status_code=400, detail="'fault_tolerance_instruction' must be a string."
12571257
)
1258-
# Currently, only two types of instructions are supported: [pause, retry].
1259-
# Additional descaling instructions will be supported in future updates.
1258+
# Supported instructions: ["pause", "retry"].
1259+
# More instruction types may be added in future updates.
12601260
elif fault_tolerance_instruction not in ["pause", "retry"]:
12611261
raise HTTPException(
1262-
status_code=400, detail="not a valid fault_tolerance_instruction"
1262+
status_code=400, detail="Invalid 'fault_tolerance_instruction' value."
12631263
)
12641264

12651265
if not isinstance(fault_tolerance_timeout, int) or fault_tolerance_timeout <= 0:
12661266
raise HTTPException(
1267-
status_code=400, detail="fault_tolerance_timeout must be a positive integer"
1267+
status_code=400,
1268+
detail="'fault_tolerance_timeout' must be a positive integer.",
12681269
)
12691270
try:
1270-
execute_result = await client.handle_fault(
1271+
success = await client.handle_fault(
12711272
fault_tolerance_instruction,
12721273
fault_tolerance_timeout,
12731274
**dynamic_fault_tolerance_params,
12741275
)
1275-
if execute_result:
1276+
if success:
12761277
return JSONResponse(
12771278
{
1278-
"message": "instruction has been executed successfully",
1279+
"message": "Instruction executed successfully.",
12791280
}
12801281
)
12811282
else:
1282-
logger.error("Fault tolerance failed, shutdown the app.")
1283+
logger.error("Fault tolerance failed. Shutting down the application.")
12831284
client.shutdown()
12841285
raise HTTPException(
12851286
status_code=400,
12861287
detail="Instruction execution failed.",
12871288
)
12881289

12891290
except Exception as e:
1290-
logger.error("Handle fault failed: %s", e)
1291+
logger.error("Failed to handle fault: %s", e)
12911292
raise HTTPException(
12921293
status_code=HTTPStatus.INTERNAL_SERVER_ERROR.value,
1293-
detail="Handle fault failed",
1294+
detail="Failed to handle fault.",
12941295
) from e
12951296

12961297

0 commit comments

Comments
 (0)