Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 1c69519

Browse files
authored
fix: app hangs if we unload model during stream completion (#513)
1 parent 6654c7e commit 1c69519

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

controllers/llamaCPP.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,7 @@ void llamaCPP::InferenceImpl(
190190
if (llama.model_type == ModelType::EMBEDDING) {
191191
LOG_WARN << "Not support completion for embedding model";
192192
Json::Value jsonResp;
193-
jsonResp["message"] =
194-
"Not support completion for embedding model";
193+
jsonResp["message"] = "Not support completion for embedding model";
195194
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
196195
resp->setStatusCode(drogon::k400BadRequest);
197196
callback(resp);
@@ -429,7 +428,8 @@ void llamaCPP::InferenceImpl(
429428

430429
// Since this is an async task, we will wait for the task to be
431430
// completed
432-
while (state->inference_status != FINISHED && retries < 10) {
431+
while (state->inference_status != FINISHED && retries < 10 &&
432+
state->instance->llama.model_loaded_external) {
433433
// Should wait chunked_content_provider lambda to be called within
434434
// 3s
435435
if (state->inference_status == PENDING) {
@@ -748,9 +748,10 @@ void llamaCPP::StopBackgroundTask() {
748748
if (llama.model_loaded_external) {
749749
llama.model_loaded_external = false;
750750
llama.condition_tasks.notify_one();
751-
LOG_INFO << "Background task stopped! ";
751+
LOG_INFO << "Stopping background task! ";
752752
if (backgroundThread.joinable()) {
753753
backgroundThread.join();
754754
}
755+
LOG_INFO << "Background task stopped! ";
755756
}
756757
}

0 commit comments

Comments
 (0)