This repository was archived by the owner on Jul 4, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Expand file tree Collapse file tree 2 files changed +13
-1
lines changed Original file line number Diff line number Diff line change 88#include < regex>
99#include < string>
1010#include < thread>
11+ #include < trantor/utils/Logger.h>
1112
1213using namespace inferences ;
1314using json = nlohmann::json;
@@ -177,6 +178,14 @@ void llamaCPP::chatCompletion(
177178 // To set default value
178179
179180 if (jsonBody) {
181+ // Increase number of chats received and clean the prompt
182+ no_of_chats++;
183+ if (no_of_chats % clean_cache_threshold == 0 ) {
184+ LOG_INFO << " Clean cache threshold reached!" ;
185+ llama.kv_cache_clear ();
186+ LOG_INFO << " Cache cleaned" ;
187+ }
188+
180189 // Default values to enable auto caching
181190 data[" cache_prompt" ] = caching_enabled;
182191 data[" n_keep" ] = -1 ;
@@ -390,6 +399,8 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
390399 .asInt ();
391400 params.cont_batching = jsonBody.get (" cont_batching" , false ).asBool ();
392401
402+ this ->clean_cache_threshold =
403+ jsonBody.get (" clean_cache_threshold" , 5 ).asInt ();
393404 this ->caching_enabled = jsonBody.get (" caching_enabled" , false ).asBool ();
394405 this ->user_prompt = jsonBody.get (" user_prompt" , " USER: " ).asString ();
395406 this ->ai_prompt = jsonBody.get (" ai_prompt" , " ASSISTANT: " ).asString ();
Original file line number Diff line number Diff line change @@ -1909,6 +1909,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
19091909 std::string pre_prompt;
19101910 int repeat_last_n;
19111911 bool caching_enabled;
1912- std::atomic<int > no_of_chats = 0 ;
1912+ std::atomic<int > no_of_chats = 0 ;
1913+ int clean_cache_threshold;
19131914};
19141915}; // namespace inferences
You can’t perform that action at this time.
0 commit comments