This repository was archived by the owner on Jul 4, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 3 files changed +13
-0
lines changed Expand file tree Collapse file tree 3 files changed +13
-0
lines changed Original file line number Diff line number Diff line change @@ -109,6 +109,7 @@ Table of parameters
109109| ` cpu_threads ` | Integer | The number of threads to use for inferencing (CPU MODE ONLY) |
110110| ` n_batch ` | Integer | The batch size for prompt eval step |
111111| ` caching_enabled ` | Boolean | To enable prompt caching or not |
112+ | ` clean_cache_threshold ` | Integer | Number of chats that will trigger clean cache action|
112113
113114*** OPTIONAL*** : You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal
114115``` zsh
Original file line number Diff line number Diff line change @@ -177,6 +177,14 @@ void llamaCPP::chatCompletion(
177177 // To set default value
178178
179179 if (jsonBody) {
180+ // Increase number of chats received and clean the prompt
181+ no_of_chats++;
182+ if (no_of_chats % clean_cache_threshold == 0 ) {
183+ LOG_INFO << " Clean cache threshold reached!" ;
184+ llama.kv_cache_clear ();
185+ LOG_INFO << " Cache cleaned" ;
186+ }
187+
180188 // Default values to enable auto caching
181189 data[" cache_prompt" ] = caching_enabled;
182190 data[" n_keep" ] = -1 ;
@@ -390,6 +398,8 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
390398 .asInt ();
391399 params.cont_batching = jsonBody.get (" cont_batching" , false ).asBool ();
392400
401+ this ->clean_cache_threshold =
402+ jsonBody.get (" clean_cache_threshold" , 5 ).asInt ();
393403 this ->caching_enabled = jsonBody.get (" caching_enabled" , false ).asBool ();
394404 this ->user_prompt = jsonBody.get (" user_prompt" , " USER: " ).asString ();
395405 this ->ai_prompt = jsonBody.get (" ai_prompt" , " ASSISTANT: " ).asString ();
Original file line number Diff line number Diff line change @@ -1909,5 +1909,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
19091909 std::string pre_prompt;
19101910 int repeat_last_n;
19111911 bool caching_enabled;
1912+ std::atomic<int > no_of_chats = 0 ;
1913+ int clean_cache_threshold;
19121914};
19131915}; // namespace inferences
You can’t perform that action at this time.
0 commit comments