Skip to content

Commit 1e15bfd

Browse files
authored
graph : fix stack-use-after-return (#14960)
ggml-ci
1 parent a118d80 commit 1e15bfd

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

src/llama-graph.h

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ class llm_graph_input_pos_bucket : public llm_graph_input_i {
144144

145145
ggml_tensor * pos_bucket = nullptr; // I32 [n_batch, n_batch]
146146

147-
const llama_hparams & hparams;
147+
const llama_hparams hparams;
148148
};
149149

150150
class llm_graph_input_pos_bucket_kv : public llm_graph_input_i {
@@ -158,7 +158,7 @@ class llm_graph_input_pos_bucket_kv : public llm_graph_input_i {
158158

159159
ggml_tensor * pos_bucket = nullptr; // I32 [n_kv, n_batch]
160160

161-
const llama_hparams & hparams;
161+
const llama_hparams hparams;
162162

163163
const llama_kv_cache_unified_context * mctx;
164164
};
@@ -177,8 +177,8 @@ class llm_graph_input_out_ids : public llm_graph_input_i {
177177

178178
ggml_tensor * out_ids; // I32 [n_outputs]
179179

180-
const llama_hparams & hparams;
181-
const llama_cparams & cparams;
180+
const llama_hparams hparams;
181+
const llama_cparams cparams;
182182

183183
const uint32_t n_outputs;
184184
};
@@ -192,7 +192,7 @@ class llm_graph_input_mean : public llm_graph_input_i {
192192

193193
ggml_tensor * mean; // F32 [n_batch, n_batch]
194194

195-
const llama_cparams & cparams;
195+
const llama_cparams cparams;
196196
};
197197

198198
class llm_graph_input_cls : public llm_graph_input_i {
@@ -204,7 +204,7 @@ class llm_graph_input_cls : public llm_graph_input_i {
204204

205205
ggml_tensor * cls; // I32 [n_batch]
206206

207-
const llama_cparams & cparams;
207+
const llama_cparams cparams;
208208
};
209209

210210
class llm_graph_input_rs : public llm_graph_input_i {
@@ -247,8 +247,8 @@ class llm_graph_input_attn_no_cache : public llm_graph_input_i {
247247
ggml_tensor * kq_mask = nullptr; // F32 [n_tokens, n_batch, 1, 1]
248248
ggml_tensor * kq_mask_cnv = nullptr; // [n_tokens, n_batch, 1, 1]
249249

250-
const llama_hparams & hparams;
251-
const llama_cparams & cparams;
250+
const llama_hparams hparams;
251+
const llama_cparams cparams;
252252
};
253253

254254
class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
@@ -278,8 +278,11 @@ class llm_graph_input_attn_kv_unified : public llm_graph_input_i {
278278
ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
279279
ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]
280280

281-
const llama_hparams & hparams;
282-
const llama_cparams & cparams;
281+
// note: these have to be copies because in order to be able to reuse a graph, its inputs
282+
// need to carry these parameters with them. otherwise, they can point to freed
283+
// llm_graph_params from a previous batch, causing stack-use-after-return
284+
const llama_hparams hparams;
285+
const llama_cparams cparams;
283286

284287
const llama_kv_cache_unified_context * mctx;
285288
};
@@ -318,8 +321,8 @@ class llm_graph_input_attn_kv_unified_iswa : public llm_graph_input_i {
318321
ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream]
319322
ggml_tensor * self_kq_mask_swa_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream]
320323

321-
const llama_hparams & hparams;
322-
const llama_cparams & cparams;
324+
const llama_hparams hparams;
325+
const llama_cparams cparams;
323326

324327
const llama_kv_cache_unified_iswa_context * mctx;
325328
};

0 commit comments

Comments
 (0)