Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 701271d

Browse files
committed
remove redundant temporary impl
1 parent 902dc3f commit 701271d

File tree

1 file changed

+6
-22
lines changed

1 file changed

+6
-22
lines changed

controllers/llamaCPP.cc

Lines changed: 6 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,17 @@
66
using namespace inferences;
77
using json = nlohmann::json;
88

9-
struct State {
9+
struct inferenceState {
1010
bool isStopped = false;
1111
int task_id;
1212
llamaCPP *instance;
1313

14-
State(int tid, llamaCPP *inst) : task_id(tid), instance(inst) {}
14+
inferenceState(int tid, llamaCPP *inst) : task_id(tid), instance(inst) {}
1515
};
1616

17-
std::shared_ptr<State> createState(int task_id, llamaCPP *instance) {
18-
return std::make_shared<State>(task_id, instance);
17+
std::shared_ptr<inferenceState> create_inference_state(int task_id,
18+
llamaCPP *instance) {
19+
return std::make_shared<inferenceState>(task_id, instance);
1920
}
2021

2122
// --------------------------------------------
@@ -295,36 +296,21 @@ void llamaCPP::chatCompletion(
295296
#endif
296297
int task_id;
297298

298-
if (llama.params.n_parallel == 1) {
299-
while (true) {
300-
if (!single_queue_is_busy) {
301-
task_id = llama.request_completion(data, false, false, -1);
302-
single_queue_is_busy = true;
303-
break;
304-
} else {
305-
std::this_thread::sleep_for(
306-
std::chrono::milliseconds(500)); // Sleep for 500 milliseconds
307-
}
308-
}
309-
} else {
310299
task_id = llama.request_completion(data, false, false, -1);
311-
}
312300

313301
LOG_INFO << "Resolved request for task_id:" << task_id;
314302

315303
if (is_streamed) {
316-
auto state = createState(task_id, this);
304+
auto state = create_inference_state(task_id, this);
317305

318306
auto chunked_content_provider =
319307
[this, state](char *pBuffer, std::size_t nBuffSize) -> std::size_t {
320308
if (!pBuffer) {
321309
LOG_INFO << "Connection closed or buffer is null. Reset context";
322310
state->instance->llama.request_cancel(state->task_id);
323-
single_queue_is_busy = false;
324311
return 0;
325312
}
326313
if (state->isStopped) {
327-
single_queue_is_busy = false;
328314
return 0;
329315
}
330316

@@ -357,10 +343,8 @@ void llamaCPP::chatCompletion(
357343
}
358344
return nRead;
359345
} else {
360-
single_queue_is_busy = false;
361346
return 0;
362347
}
363-
single_queue_is_busy = false;
364348
return 0;
365349
};
366350
auto resp = nitro_utils::nitroStreamResponse(chunked_content_provider,

0 commit comments

Comments
 (0)