From df97fd6930e081355e700058f81327617dfde9b4 Mon Sep 17 00:00:00 2001
From: Randall Smith <ransmith@amd.com>
Date: Fri, 7 Nov 2025 11:56:00 -0600
Subject: [PATCH 1/4] Fix import error and apply assert in appropriate case

Signed-off-by: Randall Smith <ransmith@amd.com>
---
 .../v1/entrypoints/llm/test_struct_output_generate.py | 11 ++++++++---
 vllm/v1/attention/backends/rocm_aiter_fa.py           |  4 ++--
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index 014e6eca2e02..e0ef6902f797 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -677,9 +677,14 @@ def test_structured_output_with_reasoning_matrices(
     reasoning_content, content = run_reasoning_extraction(reasoner, [generated_text])
     print(f"Prompt: {prompt!r}\nReasoning: {reasoning_content!r}\nContent: {content!r}")
 
-    assert content is not None and reasoning_content is not None
-    output_json = json.loads(content)
-    jsonschema.validate(instance=output_json, schema=reasoning_schema)
+    if "Qwen3" in model_name:
+        assert content is not None
+
+    assert reasoning_content is not None
+
+    if content is not None:
+        output_json = json.loads(content)
+        jsonschema.validate(instance=output_json, schema=reasoning_schema)
 
 
 @pytest.mark.skip_global_cleanup
diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index 30e5cafe0c84..f3711c9f69da 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -30,9 +30,9 @@
 _CP_TOKENS_PER_ITER_ROCM = 32 * 1024
 
 if current_platform.is_rocm():
-    import aiter
-    from aiter.ops.triton.utils.device_info import get_num_sms
+    from aiter.ops.triton.utils.arch_info import get_num_sms
 
+    import aiter
     from vllm.triton_utils import tl, triton
 
     def block_size(x, head_dim):

From 87bc309964926e126d4dc66b05e3703714e83e24 Mon Sep 17 00:00:00 2001
From: Randall Smith <ransmith@amd.com>
Date: Fri, 7 Nov 2025 12:09:18 -0600
Subject: [PATCH 2/4] precommit check

Signed-off-by: Randall Smith <ransmith@amd.com>
---
 vllm/v1/attention/backends/rocm_aiter_fa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index f3711c9f69da..a83316250fcd 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -30,9 +30,9 @@
 _CP_TOKENS_PER_ITER_ROCM = 32 * 1024
 
 if current_platform.is_rocm():
+    import aiter
     from aiter.ops.triton.utils.arch_info import get_num_sms
 
-    import aiter
     from vllm.triton_utils import tl, triton
 
     def block_size(x, head_dim):

From 0fee5aadbbbb7d9b10e3b7d9abd7237b0ab77d59 Mon Sep 17 00:00:00 2001
From: Randall Smith <ransmith@amd.com>
Date: Tue, 11 Nov 2025 16:38:08 -0600
Subject: [PATCH 3/4] fix typo

Signed-off-by: Randall Smith <ransmith@amd.com>
---
 tests/v1/entrypoints/llm/test_struct_output_generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index a7a5624d92cb..a7d769c8542a 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -680,7 +680,7 @@ def test_structured_output_with_reasoning_matrices(
     if "Qwen3" in model_name:
         assert content is not None
 
-    assert reasoning_content is not None
+    assert reasoning is not None
 
     if content is not None:
         output_json = json.loads(content)

From 338df55e8565a0cda3e63c72641ebc7e6a85bb04 Mon Sep 17 00:00:00 2001
From: Randall Smith <ransmith@amd.com>
Date: Wed, 12 Nov 2025 23:37:57 -0600
Subject: [PATCH 4/4] use get_cu_count

Signed-off-by: Randall Smith <ransmith@amd.com>
---
 vllm/v1/attention/backends/rocm_aiter_fa.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
index 23cda57039b3..d4969b3c6691 100644
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -18,6 +18,7 @@
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
 from vllm.utils.math_utils import cdiv
+from vllm.utils.platform_utils import get_cu_count
 from vllm.v1.attention.backends.utils import (
     AttentionCGSupport,
     AttentionMetadataBuilder,
@@ -31,7 +32,6 @@
 
 if current_platform.is_rocm():
     import aiter
-    from aiter.ops.triton.utils.arch_info import get_num_sms
 
     from vllm.triton_utils import tl, triton
 
@@ -39,7 +39,7 @@ def block_size(x, head_dim):
         return min(65536 // x.element_size(), triton.next_power_of_2(head_dim))
 
     def num_programs(head_dim):
-        return min(head_dim, get_num_sms())
+        return min(head_dim, get_cu_count())
 
     @triton.jit
     def cp_mha_gather_cache_kernel(