Skip to content

Commit ba42794

Browse files
authored
graph : fix equal_seq() check (#14986)
ggml-ci
1 parent 2860d47 commit ba42794

File tree

3 files changed

+16
-2
lines changed

3 files changed

+16
-2
lines changed

src/llama-context.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,15 @@ llama_context::llama_context(
113113
}
114114
}
115115

116+
{
117+
const char * LLAMA_GRAPH_REUSE_DISABLE = getenv("LLAMA_GRAPH_REUSE_DISABLE");
118+
graph_reuse_disable = LLAMA_GRAPH_REUSE_DISABLE ? (atoi(LLAMA_GRAPH_REUSE_DISABLE) != 0) : graph_reuse_disable;
119+
120+
if (graph_reuse_disable) {
121+
LLAMA_LOG_WARN("%s: graph reuse disabled\n", __func__);
122+
}
123+
}
124+
116125
const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max;
117126

118127
LLAMA_LOG_INFO("%s: n_seq_max = %u\n", __func__, cparams.n_seq_max);
@@ -716,7 +725,7 @@ llm_graph_result * llama_context::process_ubatch(const llama_ubatch & ubatch, ll
716725
// in order to correctly reuse a graph, it's full topology has to be uniquely determined by these parameters
717726
const auto gparams = graph_params(res, ubatch, mctx, gtype);
718727

719-
if (res->can_reuse(gparams)) {
728+
if (!graph_reuse_disable && res->can_reuse(gparams)) {
720729
//LLAMA_LOG_DEBUG("%s: reusing previous graph\n", __func__);
721730

722731
n_reused++;

src/llama-context.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ struct llama_context {
291291
// ref: https://github.com/ggml-org/llama.cpp/pull/14285
292292
bool supports_set_rows = false;
293293

294+
// env: LLAMA_GRAPH_REUSE_DISABLE
295+
bool graph_reuse_disable = false;
296+
294297
// perf
295298
mutable int64_t t_start_us = 0;
296299
mutable int64_t t_load_us = 0;

src/llama-graph.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,9 @@ struct llm_graph_params {
423423
(!ubatch.embd && !other.ubatch.embd)
424424
);
425425

426-
if (can_reuse_ubatch && !ubatch.equal_seqs()) {
426+
// when we split the batch using "equal_seqs" we have to verify that the participating sequences are the same
427+
// the reason is because the set of attention streams would be different for different sequences
428+
if (can_reuse_ubatch && ubatch.equal_seqs()) {
427429
if (!ubatch.data) {
428430
// if the old ubatch does not own it's data, then we cannot guarantee that it is still alive, and
429431
// therefore we cannot perform the sequence id check. normally should never happen

0 commit comments

Comments
 (0)