From df97fd6930e081355e700058f81327617dfde9b4 Mon Sep 17 00:00:00 2001 From: Randall Smith Date: Fri, 7 Nov 2025 11:56:00 -0600 Subject: [PATCH 1/4] Fix import error and apply assert in appropriate case Signed-off-by: Randall Smith --- .../v1/entrypoints/llm/test_struct_output_generate.py | 11 ++++++++--- vllm/v1/attention/backends/rocm_aiter_fa.py | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 014e6eca2e02..e0ef6902f797 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -677,9 +677,14 @@ def test_structured_output_with_reasoning_matrices( reasoning_content, content = run_reasoning_extraction(reasoner, [generated_text]) print(f"Prompt: {prompt!r}\nReasoning: {reasoning_content!r}\nContent: {content!r}") - assert content is not None and reasoning_content is not None - output_json = json.loads(content) - jsonschema.validate(instance=output_json, schema=reasoning_schema) + if "Qwen3" in model_name: + assert content is not None + + assert reasoning_content is not None + + if content is not None: + output_json = json.loads(content) + jsonschema.validate(instance=output_json, schema=reasoning_schema) @pytest.mark.skip_global_cleanup diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index 30e5cafe0c84..f3711c9f69da 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -30,9 +30,9 @@ _CP_TOKENS_PER_ITER_ROCM = 32 * 1024 if current_platform.is_rocm(): - import aiter - from aiter.ops.triton.utils.device_info import get_num_sms + from aiter.ops.triton.utils.arch_info import get_num_sms + import aiter from vllm.triton_utils import tl, triton def block_size(x, head_dim): From 87bc309964926e126d4dc66b05e3703714e83e24 Mon Sep 17 00:00:00 2001 From: Randall Smith Date: Fri, 7 Nov 2025 12:09:18 -0600 Subject: [PATCH 2/4] precommit check Signed-off-by: Randall Smith --- vllm/v1/attention/backends/rocm_aiter_fa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index f3711c9f69da..a83316250fcd 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -30,9 +30,9 @@ _CP_TOKENS_PER_ITER_ROCM = 32 * 1024 if current_platform.is_rocm(): + import aiter from aiter.ops.triton.utils.arch_info import get_num_sms - import aiter from vllm.triton_utils import tl, triton def block_size(x, head_dim): From 0fee5aadbbbb7d9b10e3b7d9abd7237b0ab77d59 Mon Sep 17 00:00:00 2001 From: Randall Smith Date: Tue, 11 Nov 2025 16:38:08 -0600 Subject: [PATCH 3/4] fix typo Signed-off-by: Randall Smith --- tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index a7a5624d92cb..a7d769c8542a 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -680,7 +680,7 @@ def test_structured_output_with_reasoning_matrices( if "Qwen3" in model_name: assert content is not None - assert reasoning_content is not None + assert reasoning is not None if content is not None: output_json = json.loads(content) From 338df55e8565a0cda3e63c72641ebc7e6a85bb04 Mon Sep 17 00:00:00 2001 From: Randall Smith Date: Wed, 12 Nov 2025 23:37:57 -0600 Subject: [PATCH 4/4] use get_cu_count Signed-off-by: Randall Smith --- vllm/v1/attention/backends/rocm_aiter_fa.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index 23cda57039b3..d4969b3c6691 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -18,6 +18,7 @@ from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils.math_utils import cdiv +from vllm.utils.platform_utils import get_cu_count from vllm.v1.attention.backends.utils import ( AttentionCGSupport, AttentionMetadataBuilder, @@ -31,7 +32,6 @@ if current_platform.is_rocm(): import aiter - from aiter.ops.triton.utils.arch_info import get_num_sms from vllm.triton_utils import tl, triton @@ -39,7 +39,7 @@ def block_size(x, head_dim): return min(65536 // x.element_size(), triton.next_power_of_2(head_dim)) def num_programs(head_dim): - return min(head_dim, get_num_sms()) + return min(head_dim, get_cu_count()) @triton.jit def cp_mha_gather_cache_kernel(