Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit cf7d3e8

Browse files
authored
Merge pull request #309 from janhq/version-pump-upgrade-bug-fixing-llava
Version pump upgrade bug fixing llava
2 parents 12e8068 + 797b251 commit cf7d3e8

File tree

3 files changed

+769
-126
lines changed

3 files changed

+769
-126
lines changed

controllers/llamaCPP.cc

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,6 @@
11
#include "llamaCPP.h"
22
#include "llama.h"
33
#include "utils/nitro_utils.h"
4-
#include <chrono>
5-
#include <cstring>
6-
#include <drogon/HttpResponse.h>
7-
#include <drogon/HttpTypes.h>
8-
#include <regex>
9-
#include <string>
10-
#include <thread>
11-
#include <trantor/utils/Logger.h>
124

135
using namespace inferences;
146
using json = nlohmann::json;
@@ -135,7 +127,7 @@ void llamaCPP::warmupModel() {
135127
pseudo["prompt"] = "Hello";
136128
pseudo["n_predict"] = 2;
137129
pseudo["stream"] = false;
138-
const int task_id = llama.request_completion(pseudo, false, false);
130+
const int task_id = llama.request_completion(pseudo, false, false, -1);
139131
std::string completion_text;
140132
task_result result = llama.next_result(task_id);
141133
if (!result.error && result.stop) {
@@ -292,7 +284,7 @@ void llamaCPP::chatCompletion(
292284
LOG_INFO << "Current completion text";
293285
LOG_INFO << formatted_output;
294286
#endif
295-
const int task_id = llama.request_completion(data, false, false);
287+
const int task_id = llama.request_completion(data, false, false, -1);
296288
LOG_INFO << "Resolved request for task_id:" << task_id;
297289

298290
if (is_streamed) {
@@ -383,7 +375,7 @@ void llamaCPP::embedding(
383375
prompt = "";
384376
}
385377
const int task_id = llama.request_completion(
386-
{{"prompt", prompt}, {"n_predict", 0}}, false, true);
378+
{{"prompt", prompt}, {"n_predict", 0}}, false, true, -1);
387379
task_result result = llama.next_result(task_id);
388380
std::vector<float> embedding_result = result.result_json["embedding"];
389381
auto resp = nitro_utils::nitroHttpResponse();

0 commit comments

Comments
 (0)