Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit d820e06

Browse files
authored
feat: e2e embedding endpoint scripts (#511)
1 parent 2c33000 commit d820e06

File tree

3 files changed

+155
-32
lines changed

3 files changed

+155
-32
lines changed

.github/scripts/e2e-test-llama-linux-and-mac.sh

Lines changed: 80 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,16 @@
44
# ./linux-and-mac.sh './jan/plugins/@janhq/inference-plugin/dist/nitro/nitro_mac_arm64' https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
55

66
# Check for required arguments
7-
if [[ $# -ne 2 ]]; then
8-
echo "Usage: $0 <path_to_binary> <url_to_download>"
7+
if [[ $# -ne 3 ]]; then
8+
echo "Usage: $0 <path_to_binary> <url_to_download_llm> <url_to_download_embedding>"
99
exit 1
1010
fi
1111

12-
rm /tmp/response1.log /tmp/response2.log /tmp/nitro.log
12+
rm /tmp/load-llm-model-res.log /tmp/completion-res.log /tmp/unload-model-res.log /tmp/load-embedding-model-res.log /tmp/embedding-res.log /tmp/nitro.log
1313

1414
BINARY_PATH=$1
15-
DOWNLOAD_URL=$2
15+
DOWNLOAD_LLM_URL=$2
16+
DOWNLOAD_EMBEDDING_URL=$3
1617

1718
# Random port to ensure it's not used
1819
min=10000
@@ -37,11 +38,16 @@ sleep 5
3738

3839
# Check if /tmp/testllm exists, if not, download it
3940
if [[ ! -f "/tmp/testllm" ]]; then
40-
curl --connect-timeout 300 $DOWNLOAD_URL --output /tmp/testllm
41+
curl --connect-timeout 300 $DOWNLOAD_LLM_URL --output /tmp/testllm
42+
fi
43+
44+
# Check if /tmp/test-embedding exists, if not, download it
45+
if [[ ! -f "/tmp/test-embedding" ]]; then
46+
curl --connect-timeout 300 $DOWNLOAD_EMBEDDING_URL --output /tmp/test-embedding
4147
fi
4248

4349
# Run the curl commands
44-
response1=$(curl --connect-timeout 60 -o /tmp/response1.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
50+
response1=$(curl --connect-timeout 60 -o /tmp/load-llm-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
4551
--header 'Content-Type: application/json' \
4652
--data '{
4753
"llama_model_path": "/tmp/testllm",
@@ -57,7 +63,7 @@ if ! ps -p $pid >/dev/null; then
5763
fi
5864

5965
response2=$(
60-
curl --connect-timeout 60 -o /tmp/response2.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
66+
curl --connect-timeout 60 -o /tmp/completion-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/chat/completions" \
6167
--header 'Content-Type: application/json' \
6268
--header 'Accept: text/event-stream' \
6369
--header 'Access-Control-Allow-Origin: *' \
@@ -76,16 +82,65 @@ response2=$(
7682
}'
7783
)
7884

85+
# unload model
86+
response3=$(curl --connect-timeout 60 -o /tmp/unload-model-res.log --request GET -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/unloadModel" \
87+
--header 'Content-Type: application/json' \
88+
--data '{
89+
"llama_model_path": "/tmp/testllm"
90+
}')
91+
92+
# load embedding model
93+
response4=$(curl --connect-timeout 60 -o /tmp/load-embedding-model-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/inferences/llamacpp/loadModel" \
94+
--header 'Content-Type: application/json' \
95+
--data '{
96+
"llama_model_path": "/tmp/test-embedding",
97+
"ctx_len": 50,
98+
"ngl": 32,
99+
"embedding": true,
100+
"model_type": "embedding"
101+
}')
102+
103+
# request embedding
104+
response5=$(
105+
curl --connect-timeout 60 -o /tmp/embedding-res.log -s -w "%{http_code}" --location "http://127.0.0.1:$PORT/v1/embeddings" \
106+
--header 'Content-Type: application/json' \
107+
--header 'Accept: text/event-stream' \
108+
--header 'Access-Control-Allow-Origin: *' \
109+
--data '{
110+
"input": "Hello",
111+
"model": "test-embedding",
112+
"encoding_format": "float"
113+
}'
114+
)
115+
79116
error_occurred=0
80117
if [[ "$response1" -ne 200 ]]; then
81-
echo "The first curl command failed with status code: $response1"
82-
cat /tmp/response1.log
118+
echo "The load llm model curl command failed with status code: $response1"
119+
cat /tmp/load-llm-model-res.log
83120
error_occurred=1
84121
fi
85122

86123
if [[ "$response2" -ne 200 ]]; then
87-
echo "The second curl command failed with status code: $response2"
88-
cat /tmp/response2.log
124+
echo "The completion curl command failed with status code: $response2"
125+
cat /tmp/completion-res.log
126+
error_occurred=1
127+
fi
128+
129+
if [[ "$response3" -ne 200 ]]; then
130+
echo "The unload model curl command failed with status code: $response3"
131+
cat /tmp/unload-model-res.log
132+
error_occurred=1
133+
fi
134+
135+
if [[ "$response4" -ne 200 ]]; then
136+
echo "The load embedding model curl command failed with status code: $response4"
137+
cat /tmp/load-embedding-model-res.log
138+
error_occurred=1
139+
fi
140+
141+
if [[ "$response5" -ne 200 ]]; then
142+
echo "The embedding curl command failed with status code: $response5"
143+
cat /tmp/embedding-res.log
89144
error_occurred=1
90145
fi
91146

@@ -99,11 +154,23 @@ fi
99154

100155
echo "----------------------"
101156
echo "Log load model:"
102-
cat /tmp/response1.log
157+
cat /tmp/load-llm-model-res.log
158+
159+
echo "----------------------"
160+
echo "Log run test:"
161+
cat /tmp/completion-res.log
162+
163+
echo "----------------------"
164+
echo "Log run test:"
165+
cat /tmp/unload-model-res.log
166+
167+
echo "----------------------"
168+
echo "Log run test:"
169+
cat /tmp/load-embedding-model-res.log
103170

104171
echo "----------------------"
105172
echo "Log run test:"
106-
cat /tmp/response2.log
173+
cat /tmp/embedding-res.log
107174

108175
echo "Nitro test run successfully!"
109176

.github/scripts/e2e-test-llama-windows.bat

Lines changed: 70 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,28 @@
11
@echo off
22

33
set "TEMP=C:\Users\%UserName%\AppData\Local\Temp"
4-
set "MODEL_PATH=%TEMP%\testllm"
4+
set "MODEL_LLM_PATH=%TEMP%\testllm"
5+
set "MODEL_EMBEDDING_PATH=%TEMP%\test-embedding"
56

67
rem Check for required arguments
7-
if "%~2"=="" (
8-
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download^>
8+
if "%~3"=="" (
9+
echo Usage: %~0 ^<path_to_binary^> ^<url_to_download_llm^> ^<url_to_download_embedding^>
910
exit /b 1
1011
)
1112

1213
set "BINARY_PATH=%~1"
13-
set "DOWNLOAD_URL=%~2"
14+
set "DOWNLOAD_LLM_URL=%~2"
15+
set "DOWNLOAD_EMBEDDING_URL=%~3"
1416

1517
for %%i in ("%BINARY_PATH%") do set "BINARY_NAME=%%~nxi"
1618

1719
echo BINARY_NAME=%BINARY_NAME%
1820

1921
del %TEMP%\response1.log 2>nul
2022
del %TEMP%\response2.log 2>nul
23+
del %TEMP%\response3.log 2>nul
24+
del %TEMP%\response4.log 2>nul
25+
del %TEMP%\response5.log 2>nul
2126
del %TEMP%\nitro.log 2>nul
2227

2328
set /a min=9999
@@ -46,33 +51,53 @@ if not defined pid (
4651
rem Wait for a few seconds to let the server start
4752

4853
rem Check if %TEMP%\testmodel exists, if not, download it
49-
if not exist "%MODEL_PATH%" (
50-
curl.exe --connect-timeout 300 %DOWNLOAD_URL% --output "%MODEL_PATH%"
54+
if not exist "%MODEL_LLM_PATH%" (
55+
curl.exe --connect-timeout 300 %DOWNLOAD_LLM_URL% --output "%MODEL_LLM_PATH%"
5156
)
5257

53-
rem Define JSON strings for curl data
54-
call set "MODEL_PATH_STRING=%%MODEL_PATH:\=\\%%"
55-
set "curl_data1={\"llama_model_path\":\"%MODEL_PATH_STRING%\"}"
56-
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":true,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
58+
if not exist "%MODEL_EMBEDDING_PATH%" (
59+
curl.exe --connect-timeout 300 %DOWNLOAD_EMBEDDING_URL% --output "%MODEL_EMBEDDING_PATH%"
60+
)
5761

58-
rem Print the values of curl_data1 and curl_data2 for debugging
62+
rem Define JSON strings for curl data
63+
call set "MODEL_LLM_PATH_STRING=%%MODEL_LLM_PATH:\=\\%%"
64+
call set "MODEL_EMBEDDING_PATH_STRING=%%MODEL_EMBEDDING_PATH:\=\\%%"
65+
set "curl_data1={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
66+
set "curl_data2={\"messages\":[{\"content\":\"Hello there\",\"role\":\"assistant\"},{\"content\":\"Write a long and sad story for me\",\"role\":\"user\"}],\"stream\":false,\"model\":\"gpt-3.5-turbo\",\"max_tokens\":50,\"stop\":[\"hello\"],\"frequency_penalty\":0,\"presence_penalty\":0,\"temperature\":0.1}"
67+
set "curl_data3={\"llama_model_path\":\"%MODEL_LLM_PATH_STRING%\"}"
68+
set "curl_data4={\"llama_model_path\":\"%MODEL_EMBEDDING_PATH_STRING%\", \"embedding\": true, \"model_type\": \"embedding\"}"
69+
set "curl_data5={\"input\": \"Hello\", \"model\": \"test-embedding\", \"encoding_format\": \"float\"}"
70+
71+
rem Print the values of curl_data for debugging
5972
echo curl_data1=%curl_data1%
6073
echo curl_data2=%curl_data2%
74+
echo curl_data3=%curl_data3%
75+
echo curl_data4=%curl_data4%
76+
echo curl_data5=%curl_data5%
6177

6278
rem Run the curl commands and capture the status code
6379
curl.exe --connect-timeout 60 -o "%TEMP%\response1.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data1%" > %TEMP%\response1.log 2>&1
6480

6581
curl.exe --connect-timeout 60 -o "%TEMP%\response2.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/chat_completion" ^
6682
--header "Content-Type: application/json" ^
67-
--header "Accept: text/event-stream" ^
68-
--header "Access-Control-Allow-Origin: *" ^
6983
--data "%curl_data2%" > %TEMP%\response2.log 2>&1
7084

85+
curl.exe --connect-timeout 60 -o "%TEMP%\response3.log" --request GET -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/unloadModel" --header "Content-Type: application/json" --data "%curl_data3%" > %TEMP%\response3.log 2>&1
86+
87+
curl.exe --connect-timeout 60 -o "%TEMP%\response4.log" --request POST -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/inferences/llamacpp/loadModel" --header "Content-Type: application/json" --data "%curl_data4%" > %TEMP%\response4.log 2>&1
88+
89+
curl.exe --connect-timeout 60 -o "%TEMP%\response5.log" -s -w "%%{http_code}" --location "http://127.0.0.1:%PORT%/v1/embeddings" ^
90+
--header "Content-Type: application/json" ^
91+
--data "%curl_data5%" > %TEMP%\response5.log 2>&1
92+
7193
set "error_occurred=0"
7294

7395
rem Read the status codes from the log files
7496
for /f %%a in (%TEMP%\response1.log) do set "response1=%%a"
7597
for /f %%a in (%TEMP%\response2.log) do set "response2=%%a"
98+
for /f %%a in (%TEMP%\response3.log) do set "response3=%%a"
99+
for /f %%a in (%TEMP%\response4.log) do set "response4=%%a"
100+
for /f %%a in (%TEMP%\response5.log) do set "response5=%%a"
76101

77102
if "%response1%" neq "200" (
78103
echo The first curl command failed with status code: %response1%
@@ -86,6 +111,24 @@ if "%response2%" neq "200" (
86111
set "error_occurred=1"
87112
)
88113

114+
if "%response3%" neq "200" (
115+
echo The third curl command failed with status code: %response3%
116+
type %TEMP%\response3.log
117+
set "error_occurred=1"
118+
)
119+
120+
if "%response4%" neq "200" (
121+
echo The fourth curl command failed with status code: %response4%
122+
type %TEMP%\response4.log
123+
set "error_occurred=1"
124+
)
125+
126+
if "%response5%" neq "200" (
127+
echo The fifth curl command failed with status code: %response5%
128+
type %TEMP%\response5.log
129+
set "error_occurred=1"
130+
)
131+
89132
if "%error_occurred%"=="1" (
90133
echo Nitro test run failed!!!!!!!!!!!!!!!!!!!!!!
91134
echo Nitro Error Logs:
@@ -96,13 +139,25 @@ if "%error_occurred%"=="1" (
96139

97140

98141
echo ----------------------
99-
echo Log load model:
142+
echo Log load llm model:
100143
type %TEMP%\response1.log
101144

102145
echo ----------------------
103-
echo "Log run test:"
146+
echo Log run test:
104147
type %TEMP%\response2.log
105148

149+
echo ----------------------
150+
echo Log unload model:
151+
type %TEMP%\response3.log
152+
153+
echo ----------------------
154+
echo Log load embedding model:
155+
type %TEMP%\response3.log
156+
157+
echo ----------------------
158+
echo Log run embedding test:
159+
type %TEMP%\response5.log
160+
106161
echo Nitro test run successfully!
107162

108163
rem Kill the server process

.github/workflows/build.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ env:
4949
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
5050
LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
5151
WHISPER_MODEL_URL: https://delta.jan.ai/ggml-tiny-q5_1.bin
52+
EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
5253

5354
jobs:
5455
create-draft-release:
@@ -186,7 +187,7 @@ jobs:
186187
run: |
187188
# run e2e testing
188189
cd nitro
189-
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
190+
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
190191
rm -rf uploads/
191192
192193
- name: Run e2e testing - Whisper.CPP
@@ -307,7 +308,7 @@ jobs:
307308
run: |
308309
# run e2e testing
309310
cd nitro/
310-
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
311+
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
311312
rm -rf uploads/
312313
313314
- name: Run e2e testing - Whisper.CPP
@@ -373,7 +374,7 @@ jobs:
373374
run: |
374375
# run e2e testing
375376
cd nitro
376-
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }}
377+
chmod +x ../.github/scripts/e2e-test-llama-linux-and-mac.sh && ../.github/scripts/e2e-test-llama-linux-and-mac.sh ./nitro ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
377378
rm -rf uploads/
378379
379380
- name: Run e2e testing - Whisper.CPP
@@ -519,7 +520,7 @@ jobs:
519520
if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
520521
run: |
521522
cd build\Release
522-
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }}
523+
..\..\.github\scripts\e2e-test-llama-windows.bat nitro.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
523524
rmdir /S /Q .\build\Release\uploads
524525
525526
- name: Run e2e testing - Whisper.cpp

0 commit comments

Comments
 (0)