Fix qwen3 deployment bugs (#155)

11zhouxuan · web-flow · commit 58f3658d61b8 · 2025-07-17T09:36:51.000+08:00
* merge

* merge

* add Mistral-Small-3.1-24B-Instruct-2503

* modify qwq-32b deploy

* add txgemma model;

* modify model list command

* fix typo

* add some ecs parameters

* add glm4-z1 models

* modify vllm backend

* add qwen3

* fix cli bugs

* fix

* add deeseek r1/Qwen3-235B-A22B

* fix local deploy account bug

* add qwen 3 awq models

* fix serialize_utils bugs

* modify qwen3 deployment

* modify docs

* modify qwen3 engine;add strands client test
diff --git a/src/emd/models/llms/qwen.py b/src/emd/models/llms/qwen.py
@@ -9,7 +9,8 @@
     vllm_qwen2d5_72b_engine064,
     vllm_qwq_engine073,
     vllm_qwq_engine082,
-    vllm_qwen3_engin084
+    vllm_qwen3_engin084,
+    vllm_qwen3_engin091
 )
 from ..services import (
     sagemaker_service,
@@ -504,7 +505,7 @@
 Model.register(
     dict(
         model_id = "Qwen3-8B",
-        supported_engines=[vllm_qwen3_engin084],
+        supported_engines=[vllm_qwen3_engin091],
         supported_instances=[
             g5d2xlarge_instance,
             g5d4xlarge_instance,
diff --git a/tests/sdk_tests/client_tests/strands_agents_test.py b/tests/sdk_tests/client_tests/strands_agents_test.py
@@ -0,0 +1,21 @@
+from strands import Agent
+from strands.models.openai import OpenAIModel
+from strands_tools import calculator, current_time
+import logging
+
+model = OpenAIModel(
+    client_args={
+        "api_key": "xxx",
+        "base_url": "http://localhost:8080/v1/",
+    },
+    # **model_config
+    model_id="Qwen3-8B",
+    params={
+        "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}
+    }
+)
+
+
+agent = Agent(model=model, tools=[calculator, current_time])
+response = agent("现在几点")
+print(response)