Skip to content

Commit 608bb14

Browse files
authored
[Attention] Remove max cudagraph size limit of 992 (vllm-project#27840)
Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com>
1 parent 4a36681 commit 608bb14

File tree

2 files changed

+0
-14
lines changed

2 files changed

+0
-14
lines changed

vllm/v1/attention/backends/flash_attn.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -244,13 +244,6 @@ def __init__(
244244
self.max_cudagraph_size = self.compilation_config.max_cudagraph_capture_size
245245

246246
if self.use_full_cuda_graph and self.aot_schedule:
247-
if self.max_cudagraph_size > 992:
248-
# This condition derives from FA3's internal heuristic.
249-
# TODO(woosuk): Support larger cudagraph sizes.
250-
raise ValueError(
251-
"Capture size larger than 992 is not supported for full cuda graph."
252-
)
253-
254247
self.scheduler_metadata = torch.zeros(
255248
vllm_config.scheduler_config.max_num_seqs + 1,
256249
dtype=torch.int32,

vllm/v1/attention/backends/mla/flashattn_mla.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,6 @@ def __init__(
9797
self.max_cudagraph_size = self.compilation_config.max_cudagraph_capture_size
9898

9999
if self.use_full_cuda_graph and self.fa_aot_schedule:
100-
if self.max_cudagraph_size > 992:
101-
# This condition derives from FA3's internal heuristic.
102-
# TODO(woosuk): Support larger cudagraph sizes.
103-
raise ValueError(
104-
"Capture size larger than 992 is not supported for full cuda graph."
105-
)
106-
107100
self.scheduler_metadata = torch.zeros(
108101
vllm_config.scheduler_config.max_num_seqs + 1,
109102
dtype=torch.int32,

0 commit comments

Comments
 (0)