Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit f473b0b

Browse files
feat: model sources (#1777)
* feat: prioritize GPUs * fix: migrate db * fix: add priority * fix: db * fix: more * feat: model sources * feat: support delete API * feat: cli: support models sources add * feat: cli: model source delete * feat: cli: add model source list * feat: sync cortex.db * chore: cleanup * feat: add metadata for model * fix: migration * chore: unit tests: cleanup * fix: add metadata * fix: pull model * chore: unit tests: update * chore: add e2e tests for models sources * chore: add API docs * chore: rename --------- Co-authored-by: vansangpfiev <sang@jan.ai>
1 parent 8dde05c commit f473b0b

23 files changed

+1269
-271
lines changed

docs/static/openapi/cortex.json

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,105 @@
807807
"tags": ["Pulling Models"]
808808
}
809809
},
810+
"/v1/models/sources": {
811+
"post": {
812+
"summary": "Add a model source",
813+
"description": "User can add a Huggingface Organization or Repository",
814+
"requestBody": {
815+
"required": false,
816+
"content": {
817+
"application/json": {
818+
"schema": {
819+
"type": "object",
820+
"properties": {
821+
"source": {
822+
"type": "string",
823+
"description": "The url of model source to add",
824+
"example": "https://huggingface.co/cortexso/tinyllama"
825+
}
826+
}
827+
}
828+
}
829+
}
830+
},
831+
"responses": {
832+
"200": {
833+
"description": "Successful installation",
834+
"content": {
835+
"application/json": {
836+
"schema": {
837+
"type": "object",
838+
"properties": {
839+
"message": {
840+
"type": "string",
841+
"example": "Added model source"
842+
}
843+
}
844+
}
845+
}
846+
}
847+
}
848+
},
849+
"tags": ["Pulling Models"]
850+
},
851+
"delete": {
852+
"summary": "Remove a model source",
853+
"description": "User can remove a Huggingface Organization or Repository",
854+
"requestBody": {
855+
"required": false,
856+
"content": {
857+
"application/json": {
858+
"schema": {
859+
"type": "object",
860+
"properties": {
861+
"source": {
862+
"type": "string",
863+
"description": "The url of model source to remove",
864+
"example": "https://huggingface.co/cortexso/tinyllama"
865+
}
866+
}
867+
}
868+
}
869+
}
870+
},
871+
"responses": {
872+
"200": {
873+
"description": "Successful uninstallation",
874+
"content": {
875+
"application/json": {
876+
"schema": {
877+
"type": "object",
878+
"properties": {
879+
"message": {
880+
"type": "string",
881+
"description": "Removed model source successfully!",
882+
"example": "Removed model source successfully!"
883+
}
884+
}
885+
}
886+
}
887+
}
888+
},
889+
"400": {
890+
"description": "Bad request",
891+
"content": {
892+
"application/json": {
893+
"schema": {
894+
"type": "object",
895+
"properties": {
896+
"error": {
897+
"type": "string",
898+
"description": "Error message describing the issue with the request"
899+
}
900+
}
901+
}
902+
}
903+
}
904+
}
905+
},
906+
"tags": ["Pulling Models"]
907+
}
908+
},
810909
"/v1/threads": {
811910
"post": {
812911
"operationId": "ThreadsController_create",

engine/cli/command_line_parser.cc

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
#include "commands/model_import_cmd.h"
2121
#include "commands/model_list_cmd.h"
2222
#include "commands/model_pull_cmd.h"
23+
#include "commands/model_source_add_cmd.h"
24+
#include "commands/model_source_del_cmd.h"
25+
#include "commands/model_source_list_cmd.h"
2326
#include "commands/model_start_cmd.h"
2427
#include "commands/model_stop_cmd.h"
2528
#include "commands/model_upd_cmd.h"
@@ -253,6 +256,8 @@ void CommandLineParser::SetupModelCommands() {
253256
"Display cpu mode");
254257
list_models_cmd->add_flag("--gpu_mode", cml_data_.display_gpu_mode,
255258
"Display gpu mode");
259+
list_models_cmd->add_flag("--available", cml_data_.display_available_model,
260+
"Display available models to download");
256261
list_models_cmd->group(kSubcommands);
257262
list_models_cmd->callback([this]() {
258263
if (std::exchange(executed_, true))
@@ -261,7 +266,8 @@ void CommandLineParser::SetupModelCommands() {
261266
cml_data_.config.apiServerHost,
262267
std::stoi(cml_data_.config.apiServerPort), cml_data_.filter,
263268
cml_data_.display_engine, cml_data_.display_version,
264-
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode);
269+
cml_data_.display_cpu_mode, cml_data_.display_gpu_mode,
270+
cml_data_.display_available_model);
265271
});
266272

267273
auto get_models_cmd =
@@ -329,6 +335,74 @@ void CommandLineParser::SetupModelCommands() {
329335
std::stoi(cml_data_.config.apiServerPort),
330336
cml_data_.model_id, cml_data_.model_path);
331337
});
338+
339+
auto model_source_cmd = models_cmd->add_subcommand(
340+
"sources", "Subcommands for managing model sources");
341+
model_source_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
342+
" models sources [options] [subcommand]");
343+
model_source_cmd->group(kSubcommands);
344+
345+
model_source_cmd->callback([this, model_source_cmd] {
346+
if (std::exchange(executed_, true))
347+
return;
348+
if (model_source_cmd->get_subcommands().empty()) {
349+
CLI_LOG(model_source_cmd->help());
350+
}
351+
});
352+
353+
auto model_src_add_cmd =
354+
model_source_cmd->add_subcommand("add", "Add a model source");
355+
model_src_add_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
356+
" models sources add [model_source]");
357+
model_src_add_cmd->group(kSubcommands);
358+
model_src_add_cmd->add_option("source", cml_data_.model_src, "");
359+
model_src_add_cmd->callback([&]() {
360+
if (std::exchange(executed_, true))
361+
return;
362+
if (cml_data_.model_src.empty()) {
363+
CLI_LOG("[model_source] is required\n");
364+
CLI_LOG(model_src_add_cmd->help());
365+
return;
366+
};
367+
368+
commands::ModelSourceAddCmd().Exec(
369+
cml_data_.config.apiServerHost,
370+
std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
371+
});
372+
373+
auto model_src_del_cmd =
374+
model_source_cmd->add_subcommand("remove", "Remove a model source");
375+
model_src_del_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
376+
" models sources remove [model_source]");
377+
model_src_del_cmd->group(kSubcommands);
378+
model_src_del_cmd->add_option("source", cml_data_.model_src, "");
379+
model_src_del_cmd->callback([&]() {
380+
if (std::exchange(executed_, true))
381+
return;
382+
if (cml_data_.model_src.empty()) {
383+
CLI_LOG("[model_source] is required\n");
384+
CLI_LOG(model_src_del_cmd->help());
385+
return;
386+
};
387+
388+
commands::ModelSourceDelCmd().Exec(
389+
cml_data_.config.apiServerHost,
390+
std::stoi(cml_data_.config.apiServerPort), cml_data_.model_src);
391+
});
392+
393+
auto model_src_list_cmd =
394+
model_source_cmd->add_subcommand("list", "List all model sources");
395+
model_src_list_cmd->usage("Usage:\n" + commands::GetCortexBinary() +
396+
" models sources list");
397+
model_src_list_cmd->group(kSubcommands);
398+
model_src_list_cmd->callback([&]() {
399+
if (std::exchange(executed_, true))
400+
return;
401+
402+
commands::ModelSourceListCmd().Exec(
403+
cml_data_.config.apiServerHost,
404+
std::stoi(cml_data_.config.apiServerPort));
405+
});
332406
}
333407

334408
void CommandLineParser::SetupConfigsCommands() {

engine/cli/command_line_parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ class CommandLineParser {
6666
bool display_version = false;
6767
bool display_cpu_mode = false;
6868
bool display_gpu_mode = false;
69+
bool display_available_model = false;
6970
std::string filter = "";
7071
std::string log_level = "INFO";
7172

@@ -74,6 +75,7 @@ class CommandLineParser {
7475
int port;
7576
config_yaml_utils::CortexConfig config;
7677
std::unordered_map<std::string, std::string> model_update_options;
78+
std::string model_src;
7779
};
7880
CmlData cml_data_;
7981
std::unordered_map<std::string, std::string> config_update_opts_;

engine/cli/commands/model_list_cmd.cc

Lines changed: 50 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ using Row_t =
2121
void ModelListCmd::Exec(const std::string& host, int port,
2222
const std::string& filter, bool display_engine,
2323
bool display_version, bool display_cpu_mode,
24-
bool display_gpu_mode) {
24+
bool display_gpu_mode, bool available) {
2525
// Start server if server is not started yet
2626
if (!commands::IsServerAlive(host, port)) {
2727
CLI_LOG("Starting server ...");
@@ -73,40 +73,62 @@ void ModelListCmd::Exec(const std::string& host, int port,
7373
continue;
7474
}
7575

76-
count += 1;
76+
if (available) {
77+
if (v["status"].asString() != "downloadable") {
78+
continue;
79+
}
7780

78-
std::vector<std::string> row = {std::to_string(count),
79-
v["model"].asString()};
80-
if (display_engine) {
81-
row.push_back(v["engine"].asString());
82-
}
83-
if (display_version) {
84-
row.push_back(v["version"].asString());
85-
}
81+
count += 1;
8682

87-
if (auto& r = v["recommendation"]; !r.isNull()) {
88-
if (display_cpu_mode) {
89-
if (!r["cpu_mode"].isNull()) {
90-
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
91-
}
83+
std::vector<std::string> row = {std::to_string(count),
84+
v["model"].asString()};
85+
if (display_engine) {
86+
row.push_back(v["engine"].asString());
87+
}
88+
if (display_version) {
89+
row.push_back(v["version"].asString());
90+
}
91+
table.add_row({row.begin(), row.end()});
92+
} else {
93+
if (v["status"].asString() == "downloadable") {
94+
continue;
95+
}
96+
97+
count += 1;
98+
99+
std::vector<std::string> row = {std::to_string(count),
100+
v["model"].asString()};
101+
if (display_engine) {
102+
row.push_back(v["engine"].asString());
103+
}
104+
if (display_version) {
105+
row.push_back(v["version"].asString());
92106
}
93107

94-
if (display_gpu_mode) {
95-
if (!r["gpu_mode"].isNull()) {
96-
std::string s;
97-
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
98-
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
99-
" - ";
100-
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
101-
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
102-
s += "recommended ngl: " +
103-
r["gpu_mode"][0]["recommend_ngl"].asString();
104-
row.push_back(s);
108+
if (auto& r = v["recommendation"]; !r.isNull()) {
109+
if (display_cpu_mode) {
110+
if (!r["cpu_mode"].isNull()) {
111+
row.push_back("RAM: " + r["cpu_mode"]["ram"].asString() + " MiB");
112+
}
113+
}
114+
115+
if (display_gpu_mode) {
116+
if (!r["gpu_mode"].isNull()) {
117+
std::string s;
118+
s += "ngl: " + r["gpu_mode"][0]["ngl"].asString() + " - ";
119+
s += "context: " + r["gpu_mode"][0]["context_length"].asString() +
120+
" - ";
121+
s += "RAM: " + r["gpu_mode"][0]["ram"].asString() + " MiB - ";
122+
s += "VRAM: " + r["gpu_mode"][0]["vram"].asString() + " MiB - ";
123+
s += "recommended ngl: " +
124+
r["gpu_mode"][0]["recommend_ngl"].asString();
125+
row.push_back(s);
126+
}
105127
}
106128
}
107-
}
108129

109-
table.add_row({row.begin(), row.end()});
130+
table.add_row({row.begin(), row.end()});
131+
}
110132
}
111133
}
112134

engine/cli/commands/model_list_cmd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ class ModelListCmd {
88
public:
99
void Exec(const std::string& host, int port, const std::string& filter,
1010
bool display_engine = false, bool display_version = false,
11-
bool display_cpu_mode = false, bool display_gpu_mode = false);
11+
bool display_cpu_mode = false, bool display_gpu_mode = false,
12+
bool available = false);
1213
};
1314
} // namespace commands
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#include "model_source_add_cmd.h"
2+
#include "server_start_cmd.h"
3+
#include "utils/json_helper.h"
4+
#include "utils/logging_utils.h"
5+
namespace commands {
6+
bool ModelSourceAddCmd::Exec(const std::string& host, int port, const std::string& model_source) {
7+
// Start server if server is not started yet
8+
if (!commands::IsServerAlive(host, port)) {
9+
CLI_LOG("Starting server ...");
10+
commands::ServerStartCmd ssc;
11+
if (!ssc.Exec(host, port)) {
12+
return false;
13+
}
14+
}
15+
16+
auto url = url_parser::Url{
17+
.protocol = "http",
18+
.host = host + ":" + std::to_string(port),
19+
.pathParams = {"v1", "models", "sources"},
20+
};
21+
22+
Json::Value json_data;
23+
json_data["source"] = model_source;
24+
25+
auto data_str = json_data.toStyledString();
26+
auto res = curl_utils::SimplePostJson(url.ToFullPath(), data_str);
27+
if (res.has_error()) {
28+
auto root = json_helper::ParseJsonString(res.error());
29+
CLI_LOG(root["message"].asString());
30+
return false;
31+
}
32+
33+
CLI_LOG("Added model source: " << model_source);
34+
return true;
35+
}
36+
37+
38+
}; // namespace commands
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#pragma once
2+
3+
#include <string>
4+
#include <unordered_map>
5+
6+
namespace commands {
7+
8+
class ModelSourceAddCmd {
9+
public:
10+
bool Exec(const std::string& host, int port, const std::string& model_source);
11+
};
12+
} // namespace commands

0 commit comments

Comments
 (0)