Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 58d9ba8

Browse files
authored
Merge pull request #252 from janhq/251-feat-periodically-clean-cache
251 feat periodically clean cache
2 parents 73bd511 + 5aa10f9 commit 58d9ba8

File tree

3 files changed

+13
-0
lines changed

3 files changed

+13
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ Table of parameters
109109
| `cpu_threads` | Integer | The number of threads to use for inferencing (CPU MODE ONLY) |
110110
| `n_batch` | Integer | The batch size for prompt eval step |
111111
| `caching_enabled` | Boolean | To enable prompt caching or not |
112+
| `clean_cache_threshold` | Integer | Number of chats that will trigger clean cache action|
112113

113114
***OPTIONAL***: You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal
114115
```zsh

controllers/llamaCPP.cc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,14 @@ void llamaCPP::chatCompletion(
177177
// To set default value
178178

179179
if (jsonBody) {
180+
// Increase number of chats received and clean the prompt
181+
no_of_chats++;
182+
if (no_of_chats % clean_cache_threshold == 0) {
183+
LOG_INFO << "Clean cache threshold reached!";
184+
llama.kv_cache_clear();
185+
LOG_INFO << "Cache cleaned";
186+
}
187+
180188
// Default values to enable auto caching
181189
data["cache_prompt"] = caching_enabled;
182190
data["n_keep"] = -1;
@@ -390,6 +398,8 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
390398
.asInt();
391399
params.cont_batching = jsonBody.get("cont_batching", false).asBool();
392400

401+
this->clean_cache_threshold =
402+
jsonBody.get("clean_cache_threshold", 5).asInt();
393403
this->caching_enabled = jsonBody.get("caching_enabled", false).asBool();
394404
this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
395405
this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();

controllers/llamaCPP.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1909,5 +1909,7 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
19091909
std::string pre_prompt;
19101910
int repeat_last_n;
19111911
bool caching_enabled;
1912+
std::atomic<int> no_of_chats = 0;
1913+
int clean_cache_threshold;
19121914
};
19131915
}; // namespace inferences

0 commit comments

Comments
 (0)