Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit d47322d

Browse files
committed
hotfix: caching
1 parent a9a90a1 commit d47322d

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

controllers/llamaCPP.cc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,14 +157,17 @@ void llamaCPP::chatCompletion(
157157
// To set default value
158158

159159
if (jsonBody) {
160+
// Default values to enable auto caching
161+
data["cache_prompt"] = true;
162+
data["n_keep"] = -1;
163+
160164
data["stream"] = (*jsonBody).get("stream", false).asBool();
161165
data["n_predict"] = (*jsonBody).get("max_tokens", 500).asInt();
162166
data["top_p"] = (*jsonBody).get("top_p", 0.95).asFloat();
163167
data["temperature"] = (*jsonBody).get("temperature", 0.8).asFloat();
164168
data["frequency_penalty"] =
165169
(*jsonBody).get("frequency_penalty", 0).asFloat();
166170
data["presence_penalty"] = (*jsonBody).get("presence_penalty", 0).asFloat();
167-
data["cache_prompt"] = true;
168171
const Json::Value &messages = (*jsonBody)["messages"];
169172
for (const auto &message : messages) {
170173
std::string input_role = message["role"].asString();

0 commit comments

Comments
 (0)