From 44d0d2eda2143e6b510685c0a90a04b015e29562 Mon Sep 17 00:00:00 2001 From: ZhuJiaqi Date: Tue, 14 Oct 2025 09:34:45 +0000 Subject: [PATCH 1/2] fix get max_q_len in page prefill plan --- flashinfer/prefill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashinfer/prefill.py b/flashinfer/prefill.py index ac73ca9871..85401d7316 100755 --- a/flashinfer/prefill.py +++ b/flashinfer/prefill.py @@ -1715,7 +1715,7 @@ def plan( self._max_q_len = max_token_per_sequence else: qo_indptr_host = qo_indptr.to("cpu") - self._max_q_len = max(qo_indptr_host).item() + self._max_q_len = max(qo_indptr_host[1:] - qo_indptr_host[:-1]).item() total_num_rows = int(qo_indptr_host[-1]) if max_sequence_kv is not None: From ff6e4bbf32723bc170d14cbcc528bb06b1ec316a Mon Sep 17 00:00:00 2001 From: Zihao Ye Date: Thu, 16 Oct 2025 18:12:15 -0400 Subject: [PATCH 2/2] pre-commits --- flashinfer/prefill.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flashinfer/prefill.py b/flashinfer/prefill.py index 85401d7316..7399bd4268 100755 --- a/flashinfer/prefill.py +++ b/flashinfer/prefill.py @@ -1715,7 +1715,7 @@ def plan( self._max_q_len = max_token_per_sequence else: qo_indptr_host = qo_indptr.to("cpu") - self._max_q_len = max(qo_indptr_host[1:] - qo_indptr_host[:-1]).item() + self._max_q_len = max(qo_indptr_host[1:] - qo_indptr_host[:-1]).item() total_num_rows = int(qo_indptr_host[-1]) if max_sequence_kv is not None: