Merge pull request #248 from janhq/247-feat-should-allow-turning-off-caching

tikikun · web-flow · commit b547fdca115f · 2023-12-11T15:16:56.000+07:00
add caching enabled to hotfix cache issues
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -178,7 +178,7 @@ void llamaCPP::chatCompletion(
 
   if (jsonBody) {
     // Default values to enable auto caching
-    data["cache_prompt"] = true;
+    data["cache_prompt"] = caching_enabled;
     data["n_keep"] = -1;
 
     // Passing load value
@@ -390,6 +390,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
             .asInt();
     params.cont_batching = jsonBody.get("cont_batching", false).asBool();
 
+    this->caching_enabled = jsonBody.get("caching_enabled", false).asBool();
     this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
     this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();
     this->system_prompt =
diff --git a/controllers/llamaCPP.h b/controllers/llamaCPP.h
@@ -1908,5 +1908,6 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
   std::string system_prompt;
   std::string pre_prompt;
   int repeat_last_n;
+  bool caching_enabled;
 };
 }; // namespace inferences
diff --git a/llama.cpp b/llama.cpp
@@ -1 +1 @@
-Subproject commit fe680e3d1080a765e5d3150ffd7bab189742898d
+Subproject commit 1f5cd83275fabb43f2ae92c30033b384a3eb37b4