Skip to content

Commit 00131d6

Browse files
authored
tests : update for LLAMA_SET_ROWS=1 (#14961)
* test-thread-safety : each context uses a single sequence * embedding : handle --parallel argument ggml-ci * save-load : handle -np 1 ggml-ci * thread-safety : avoid overriding threads, reduce test case arg ggml-ci
1 parent 1e15bfd commit 00131d6

File tree

5 files changed

+19
-2
lines changed

5 files changed

+19
-2
lines changed

examples/embedding/embedding.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,14 @@ int main(int argc, char ** argv) {
8181

8282
params.embedding = true;
8383

84+
// if the number of prompts that would be encoded is known in advance, it's more efficient to specify the
85+
// --parallel argument accordingly. for convenience, if not specified, we fallback to unified KV cache
86+
// in order to support any number of prompts
87+
if (params.n_parallel == 1) {
88+
LOG_INF("%s: n_parallel == 1 -> unified KV cache is enabled\n", __func__);
89+
params.kv_unified = true;
90+
}
91+
8492
// utilize the full context
8593
if (params.n_batch < params.n_ctx) {
8694
LOG_WRN("%s: setting batch size to %d\n", __func__, params.n_ctx);

examples/save-load-state/save-load-state.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ int main(int argc, char ** argv) {
1515
return 1;
1616
}
1717

18+
if (params.n_parallel == 1) {
19+
// the example uses 2 sequences, so when n_parallel == 1, we need to enable unified kv cache
20+
printf("%s: n_parallel == 1, enabling unified kv cache\n", __func__);
21+
params.kv_unified = true;
22+
}
23+
1824
common_init();
1925

2026
if (params.n_predict < 0) {

src/llama-batch.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ bool llama_batch_allocr::init(
5959
for (int32_t i = 0; i < batch.n_tokens; ++i) {
6060
for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) {
6161
if (batch.seq_id && (batch.seq_id[i][s] < 0 || batch.seq_id[i][s] >= (llama_seq_id) n_seq_max)) {
62-
LLAMA_LOG_ERROR("%s: invalid seq_id[%d][%d] = %d > %d\n", __func__, i, s, batch.seq_id[i][s], (llama_seq_id) n_seq_max);
62+
LLAMA_LOG_ERROR("%s: invalid seq_id[%d][%d] = %d >= %d\n", __func__, i, s, batch.seq_id[i][s], (llama_seq_id) n_seq_max);
6363
return false;
6464
}
6565
}

tests/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ llama_build_and_test(test-json-partial.cpp)
185185
llama_build_and_test(test-log.cpp)
186186
llama_build_and_test(test-regex-partial.cpp)
187187

188-
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4)
188+
llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
189189

190190
# this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
191191
if (NOT WIN32)

tests/test-thread-safety.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ int main(int argc, char ** argv) {
3434

3535
auto cparams = common_context_params_to_llama(params);
3636

37+
// each context has a single sequence
38+
cparams.n_seq_max = 1;
39+
3740
int dev_count = ggml_backend_dev_count();
3841
int gpu_dev_count = 0;
3942
for (int i = 0; i < dev_count; ++i) {

0 commit comments

Comments
 (0)