Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 2f179bc

Browse files
committed
load model with clean_cache_threshold
1 parent a4c0b8e commit 2f179bc

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

controllers/llamaCPP.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <regex>
99
#include <string>
1010
#include <thread>
11+
#include <trantor/utils/Logger.h>
1112

1213
using namespace inferences;
1314
using json = nlohmann::json;
@@ -177,6 +178,14 @@ void llamaCPP::chatCompletion(
177178
// To set default value
178179

179180
if (jsonBody) {
181+
// Increase number of chats received and clean the prompt
182+
no_of_chats++;
183+
if (no_of_chats % clean_cache_threshold == 0) {
184+
LOG_INFO << "Clean cache threshold reached!";
185+
llama.kv_cache_clear();
186+
LOG_INFO << "Cache cleaned";
187+
}
188+
180189
// Default values to enable auto caching
181190
data["cache_prompt"] = caching_enabled;
182191
data["n_keep"] = -1;
@@ -390,6 +399,8 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
390399
.asInt();
391400
params.cont_batching = jsonBody.get("cont_batching", false).asBool();
392401

402+
this->clean_cache_threshold =
403+
jsonBody.get("clean_cache_threshold", 5).asInt();
393404
this->caching_enabled = jsonBody.get("caching_enabled", false).asBool();
394405
this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
395406
this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();

controllers/llamaCPP.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1909,6 +1909,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
19091909
std::string pre_prompt;
19101910
int repeat_last_n;
19111911
bool caching_enabled;
1912-
std::atomic<int> no_of_chats = 0;
1912+
std::atomic<int> no_of_chats = 0;
1913+
int clean_cache_threshold;
19131914
};
19141915
}; // namespace inferences

0 commit comments

Comments
 (0)