|
1 | 1 | #include "llamaCPP.h" |
2 | 2 | #include "llama.h" |
3 | 3 | #include "utils/nitro_utils.h" |
4 | | -#include <chrono> |
5 | | -#include <cstring> |
6 | | -#include <drogon/HttpResponse.h> |
7 | | -#include <drogon/HttpTypes.h> |
8 | | -#include <regex> |
9 | | -#include <string> |
10 | | -#include <thread> |
11 | | -#include <trantor/utils/Logger.h> |
12 | 4 |
|
13 | 5 | using namespace inferences; |
14 | 6 | using json = nlohmann::json; |
@@ -135,7 +127,7 @@ void llamaCPP::warmupModel() { |
135 | 127 | pseudo["prompt"] = "Hello"; |
136 | 128 | pseudo["n_predict"] = 2; |
137 | 129 | pseudo["stream"] = false; |
138 | | - const int task_id = llama.request_completion(pseudo, false, false); |
| 130 | + const int task_id = llama.request_completion(pseudo, false, false, -1); |
139 | 131 | std::string completion_text; |
140 | 132 | task_result result = llama.next_result(task_id); |
141 | 133 | if (!result.error && result.stop) { |
@@ -292,7 +284,7 @@ void llamaCPP::chatCompletion( |
292 | 284 | LOG_INFO << "Current completion text"; |
293 | 285 | LOG_INFO << formatted_output; |
294 | 286 | #endif |
295 | | - const int task_id = llama.request_completion(data, false, false); |
| 287 | + const int task_id = llama.request_completion(data, false, false, -1); |
296 | 288 | LOG_INFO << "Resolved request for task_id:" << task_id; |
297 | 289 |
|
298 | 290 | if (is_streamed) { |
@@ -383,7 +375,7 @@ void llamaCPP::embedding( |
383 | 375 | prompt = ""; |
384 | 376 | } |
385 | 377 | const int task_id = llama.request_completion( |
386 | | - {{"prompt", prompt}, {"n_predict", 0}}, false, true); |
| 378 | + {{"prompt", prompt}, {"n_predict", 0}}, false, true, -1); |
387 | 379 | task_result result = llama.next_result(task_id); |
388 | 380 | std::vector<float> embedding_result = result.result_json["embedding"]; |
389 | 381 | auto resp = nitro_utils::nitroHttpResponse(); |
|
0 commit comments