Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit ba13676

Browse files
authored
feat: cortex onboarding (#565)
1 parent 952d8a2 commit ba13676

File tree

9 files changed

+226
-28
lines changed

9 files changed

+226
-28
lines changed

cortex-js/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@
3333
"@nestjs/mapped-types": "*",
3434
"@nestjs/platform-express": "^10.0.0",
3535
"@nestjs/swagger": "^7.3.1",
36+
"@terascope/fetch-github-release": "^0.8.8",
3637
"axios": "^1.6.8",
3738
"class-transformer": "^0.5.1",
3839
"class-validator": "^0.14.1",
3940
"cli-progress": "^3.12.0",
41+
"decompress": "^4.2.1",
4042
"nest-commander": "^3.13.0",
4143
"readline": "^1.3.0",
4244
"reflect-metadata": "^0.2.0",
@@ -52,6 +54,7 @@
5254
"@nestjs/testing": "^10.0.0",
5355
"@nestjs/typeorm": "^10.0.2",
5456
"@types/cli-progress": "^3.11.5",
57+
"@types/decompress": "^4.2.7",
5558
"@types/express": "^4.17.17",
5659
"@types/jest": "^29.5.2",
5760
"@types/node": "^20.12.9",

cortex-js/src/command.module.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import { ModelsCommand } from './infrastructure/commanders/models.command';
1111
import { StartCommand } from './infrastructure/commanders/start.command';
1212
import { ExtensionModule } from './infrastructure/repositories/extensions/extension.module';
1313
import { ChatModule } from './usecases/chat/chat.module';
14+
import { InitCommand } from './infrastructure/commanders/init.command';
15+
import { HttpModule } from '@nestjs/axios';
16+
import { CreateInitQuestions } from './infrastructure/commanders/inquirer/init.questions';
1417

1518
@Module({
1619
imports: [
@@ -24,6 +27,7 @@ import { ChatModule } from './usecases/chat/chat.module';
2427
CortexModule,
2528
ChatModule,
2629
ExtensionModule,
30+
HttpModule,
2731
],
2832
providers: [
2933
BasicCommand,
@@ -32,6 +36,8 @@ import { ChatModule } from './usecases/chat/chat.module';
3236
ServeCommand,
3337
InferenceCommand,
3438
StartCommand,
39+
InitCommand,
40+
CreateInitQuestions,
3541
],
3642
})
3743
export class CommandModule {}

cortex-js/src/infrastructure/commanders/basic-command.commander.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,16 @@ import { InferenceCommand } from './inference.command';
55
import { ModelsCommand } from './models.command';
66
import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
77
import { defaultCortexJsHost, defaultCortexJsPort } from 'constant';
8+
import { InitCommand } from './init.command';
89

910
@RootCommand({
10-
subCommands: [ModelsCommand, PullCommand, ServeCommand, InferenceCommand],
11+
subCommands: [
12+
ModelsCommand,
13+
PullCommand,
14+
ServeCommand,
15+
InferenceCommand,
16+
InitCommand,
17+
],
1118
})
1219
export class BasicCommand extends CommandRunner {
1320
constructor(private readonly cortexUsecases: CortexUsecases) {
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
import { createWriteStream, existsSync, rmSync } from 'fs';
2+
import { CommandRunner, SubCommand, InquirerService } from 'nest-commander';
3+
import { resolve } from 'path';
4+
import { HttpService } from '@nestjs/axios';
5+
import { Presets, SingleBar } from 'cli-progress';
6+
import decompress from 'decompress';
7+
8+
@SubCommand({ name: 'init', aliases: ['setup'] })
9+
export class InitCommand extends CommandRunner {
10+
CORTEX_RELEASES_URL = 'https://api.github.com/repos/janhq/cortex/releases';
11+
12+
constructor(
13+
private readonly httpService: HttpService,
14+
private readonly inquirerService: InquirerService,
15+
) {
16+
super();
17+
}
18+
19+
async run(input: string[], options?: any): Promise<void> {
20+
options = await this.inquirerService.ask('create-init-questions', options);
21+
const version = input[0] ?? 'latest';
22+
23+
await this.download(this.parseEngineFileName(options), version);
24+
}
25+
26+
download = async (
27+
engineFileName: string,
28+
version: string = 'latest',
29+
): Promise<any> => {
30+
const res = await this.httpService
31+
.get(
32+
this.CORTEX_RELEASES_URL + `${version === 'latest' ? '/latest' : ''}`,
33+
{
34+
headers: {
35+
'X-GitHub-Api-Version': '2022-11-28',
36+
Accept: 'application/vnd.github+json',
37+
},
38+
},
39+
)
40+
.toPromise();
41+
42+
if (!res?.data) {
43+
console.log('Failed to fetch releases');
44+
process.exit(1);
45+
}
46+
47+
let release = res?.data;
48+
if (Array.isArray(res?.data)) {
49+
release = Array(res?.data)[0].find(
50+
(e) => e.name === version.replace('v', ''),
51+
);
52+
}
53+
const toDownloadAsset = release.assets.find((s: any) =>
54+
s.name.includes(engineFileName),
55+
);
56+
57+
if (!toDownloadAsset) {
58+
console.log(`Could not find engine file ${engineFileName}`);
59+
process.exit(1);
60+
}
61+
62+
console.log(`Downloading engine file ${engineFileName}`);
63+
const engineDir = resolve(this.rootDir(), 'cortex-cpp');
64+
if (existsSync(engineDir)) rmSync(engineDir, { recursive: true });
65+
66+
const download = await this.httpService
67+
.get(toDownloadAsset.browser_download_url, {
68+
responseType: 'stream',
69+
})
70+
.toPromise();
71+
if (!download) {
72+
throw new Error('Failed to download model');
73+
}
74+
75+
const destination = resolve(this.rootDir(), toDownloadAsset.name);
76+
77+
await new Promise((resolve, reject) => {
78+
const writer = createWriteStream(destination);
79+
let receivedBytes = 0;
80+
const totalBytes = download.headers['content-length'];
81+
82+
writer.on('finish', () => {
83+
bar.stop();
84+
resolve(true);
85+
});
86+
87+
writer.on('error', (error) => {
88+
bar.stop();
89+
reject(error);
90+
});
91+
92+
const bar = new SingleBar({}, Presets.shades_classic);
93+
bar.start(100, 0);
94+
95+
download.data.on('data', (chunk: any) => {
96+
receivedBytes += chunk.length;
97+
bar.update(Math.floor((receivedBytes / totalBytes) * 100));
98+
});
99+
100+
download.data.pipe(writer);
101+
});
102+
103+
try {
104+
await decompress(
105+
resolve(this.rootDir(), destination),
106+
resolve(this.rootDir()),
107+
);
108+
} catch (e) {
109+
console.log(e);
110+
process.exit(1);
111+
}
112+
process.exit(0);
113+
};
114+
115+
parseEngineFileName = (options: {
116+
runMode?: 'CPU' | 'GPU';
117+
gpuType?: 'Nvidia' | 'Others (Vulkan)';
118+
instructions?: 'AVX' | 'AVX2' | 'AVX-512' | undefined;
119+
cudaVersion?: '11' | '12';
120+
}) => {
121+
const platform =
122+
process.platform === 'win32'
123+
? 'windows'
124+
: process.platform === 'darwin'
125+
? 'mac'
126+
: process.platform;
127+
const arch = process.arch === 'arm64' ? process.arch : 'amd64';
128+
const cudaVersion =
129+
options.runMode === 'GPU'
130+
? options.gpuType === 'Nvidia'
131+
? '-cuda-' + (options.cudaVersion === '11' ? '11.7' : '12.2')
132+
: '-vulkan'
133+
: '';
134+
const instructions = options.instructions ? `-${options.instructions}` : '';
135+
const engineName = `${platform}-${arch}${instructions.toLowerCase()}${cudaVersion}`;
136+
return `${engineName}.tar.gz`;
137+
};
138+
139+
rootDir = () => resolve(__dirname, `../../../`);
140+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import { Question, QuestionSet } from 'nest-commander';
2+
3+
@QuestionSet({ name: 'create-init-questions' })
4+
export class CreateInitQuestions {
5+
@Question({
6+
type: 'list',
7+
message: 'Select run mode',
8+
name: 'runMode',
9+
default: 'CPU',
10+
choices: ['CPU', 'GPU'],
11+
when: () => process.platform !== 'darwin',
12+
})
13+
parseRunMode(val: string) {
14+
return val;
15+
}
16+
17+
@Question({
18+
type: 'list',
19+
message: 'Select GPU type',
20+
name: 'gpuType',
21+
default: 'Nvidia',
22+
choices: ['Nvidia', 'Others (Vulkan)'],
23+
when: (answers: any) => answers.runMode === 'GPU',
24+
})
25+
parseGPUType(val: string) {
26+
return val;
27+
}
28+
29+
@Question({
30+
type: 'list',
31+
message: 'Select CPU instructions set',
32+
name: 'instructions',
33+
choices: ['AVX2', 'AVX', 'AVX-512'],
34+
when: () => process.platform !== 'darwin',
35+
})
36+
parseContent(val: string) {
37+
return val;
38+
}
39+
}

cortex-js/src/infrastructure/commanders/start.command.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import { CortexUsecases } from '@/usecases/cortex/cortex.usecases';
22
import { ModelsUsecases } from '@/usecases/models/models.usecases';
33
import { CommandRunner, SubCommand } from 'nest-commander';
44
import { LoadModelDto } from '../dtos/models/load-model.dto';
5+
import { resolve } from 'path';
6+
import { existsSync } from 'fs';
57

68
@SubCommand({ name: 'start', aliases: ['run'] })
79
export class StartCommand extends CommandRunner {
@@ -26,6 +28,10 @@ export class StartCommand extends CommandRunner {
2628
}
2729

2830
private async startCortex() {
31+
if (!existsSync(resolve(this.rootDir(), 'cortex-cpp'))) {
32+
console.log('Please init the cortex by running cortex init command!');
33+
process.exit(0);
34+
}
2935
const host = '127.0.0.1';
3036
const port = '3928';
3137
return this.cortexUsecases.startCortex(host, port);
@@ -45,4 +51,6 @@ export class StartCommand extends CommandRunner {
4551
const loadModelDto: LoadModelDto = { modelId, settings };
4652
return this.modelsUsecases.startModel(loadModelDto);
4753
}
54+
55+
rootDir = () => resolve(__dirname, `../../../`);
4856
}

cortex-js/src/infrastructure/providers/cortex/cortex.provider.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export default class CortexProvider extends OAIEngineExtension {
4646
const cpuThreadCount = 1; // TODO: NamH Math.max(1, nitroResourceProbe.numCpuPhysicalCore);
4747
const modelSettings = {
4848
// This is critical and requires real CPU physical core count (or performance core)
49+
model: model.id,
4950
cpu_threads: cpuThreadCount,
5051
...model.settings,
5152
llama_model_path: modelBinaryLocalPath,

cortex-js/src/usecases/cortex/cortex.usecases.ts

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,14 @@
1-
import { Injectable, InternalServerErrorException } from '@nestjs/common';
2-
import { ConfigService } from '@nestjs/config';
1+
import { Injectable } from '@nestjs/common';
32
import { ChildProcess, spawn } from 'child_process';
43
import { join } from 'path';
5-
import { existsSync } from 'fs';
64
import { CortexOperationSuccessfullyDto } from '@/infrastructure/dtos/cortex/cortex-operation-successfully.dto';
75
import { HttpService } from '@nestjs/axios';
86

97
@Injectable()
108
export class CortexUsecases {
119
private cortexProcess: ChildProcess | undefined;
1210

13-
constructor(
14-
private readonly configService: ConfigService,
15-
private readonly httpService: HttpService,
16-
) {}
11+
constructor(private readonly httpService: HttpService) {}
1712

1813
async startCortex(
1914
host: string,
@@ -26,29 +21,27 @@ export class CortexUsecases {
2621
};
2722
}
2823

29-
const binaryPath = this.configService.get<string>('CORTEX_BINARY_PATH');
30-
if (!binaryPath || !existsSync(binaryPath)) {
31-
throw new InternalServerErrorException('Cortex binary not found');
32-
}
33-
3424
const args: string[] = ['1', host, port];
3525
// go up one level to get the binary folder, have to also work on windows
36-
const binaryFolder = join(binaryPath, '..');
37-
38-
this.cortexProcess = spawn(binaryPath, args, {
39-
detached: false,
40-
cwd: binaryFolder,
41-
stdio: 'inherit',
42-
env: {
43-
...process.env,
44-
// TODO: NamH need to get below information
45-
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
46-
// // Vulkan - Support 1 device at a time for now
47-
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
48-
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
49-
// }),
26+
// const binaryFolder = join(binaryPath, '..');
27+
this.cortexProcess = spawn(
28+
join(__dirname, '../../../cortex-cpp/cortex-cpp'),
29+
args,
30+
{
31+
detached: false,
32+
cwd: join(__dirname, '../../../cortex-cpp'),
33+
stdio: 'inherit',
34+
env: {
35+
...process.env,
36+
// TODO: NamH need to get below information
37+
// CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
38+
// // Vulkan - Support 1 device at a time for now
39+
// ...(executableOptions.vkVisibleDevices?.length > 0 && {
40+
// GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
41+
// }),
42+
},
5043
},
51-
});
44+
);
5245

5346
this.registerCortexEvents();
5447

cortex-js/tsconfig.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"strictBindCallApply": true,
1919
"forceConsistentCasingInFileNames": true,
2020
"noFallthroughCasesInSwitch": true,
21+
"esModuleInterop": true,
2122
"paths": {
2223
"@/*": ["src/*"]
2324
}

0 commit comments

Comments
 (0)