Merge pull request #370 from janhq/364-feat-add-mmlock-option-when-load-model

tikikun · web-flow · commit 8bff906c3afa · 2024-01-23T16:30:22.000+07:00
add mlock to load model
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -359,7 +359,8 @@ void llamaCPP::chatCompletion(
           while (state->instance->single_queue_is_busy) {
             LOG_INFO << "Waiting for task to be released status:"
                      << state->instance->single_queue_is_busy;
-            std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Waiting in 500 miliseconds step
+            std::this_thread::sleep_for(std::chrono::milliseconds(
+                500)); // Waiting in 500 miliseconds step
           }
         }
         std::string str = "\n\n";
@@ -476,6 +477,9 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
 
       params.grp_attn_w = jsonBody["grp_attn_w"].asInt();
     }
+    if (!jsonBody["mlock"].isNull()) {
+      params.use_mlock = jsonBody["mlock"].asBool();
+    }
     params.model = jsonBody["llama_model_path"].asString();
     params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
     params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();

Original file line number	Diff line number	Diff line change
`@@ -359,7 +359,8 @@ void llamaCPP::chatCompletion(`
`359`	`359`	`while (state->instance->single_queue_is_busy) {`
`360`	`360`	`LOG_INFO << "Waiting for task to be released status:"`
`361`	`361`	`<< state->instance->single_queue_is_busy;`
`362`		`- std::this_thread::sleep_for(std::chrono::milliseconds(500)); // Waiting in 500 miliseconds step`
	`362`	`+ std::this_thread::sleep_for(std::chrono::milliseconds(`
	`363`	`+ 500)); // Waiting in 500 miliseconds step`
`363`	`364`	`}`
`364`	`365`	`}`
`365`	`366`	`std::string str = "\n\n";`
`@@ -476,6 +477,9 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {`
`476`	`477`
`477`	`478`	`params.grp_attn_w = jsonBody["grp_attn_w"].asInt();`
`478`	`479`	`}`
	`480`	`+ if (!jsonBody["mlock"].isNull()) {`
	`481`	`+ params.use_mlock = jsonBody["mlock"].asBool();`
	`482`	`+ }`
`479`	`483`	`params.model = jsonBody["llama_model_path"].asString();`
`480`	`484`	`params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();`
`481`	`485`	`params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();`