From 6869210d9e2c54b5bc5979163d49610b32957ee4 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Thu, 20 Mar 2025 18:28:42 -0700
Subject: [PATCH 01/14] initial commit with new function calling base test
 class

---
 test/srt/test_function_calling.py | 221 +++++++++++++++++++-----------
 1 file changed, 142 insertions(+), 79 deletions(-)

diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py
index 24f341a5e47..1f5856acde4 100644
--- a/test/srt/test_function_calling.py
+++ b/test/srt/test_function_calling.py
@@ -1,6 +1,7 @@
 import json
 import time
 import unittest
+from typing import Optional
 
 import openai
 
@@ -14,11 +15,13 @@
 )
 
 
-class TestOpenAIServerFunctionCalling(unittest.TestCase):
+class OpenAIServerFunctionCallingBase(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
         cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+        cls.tool_call_parser = "llama3"
+        cls.tp = 1        
         cls.base_url = DEFAULT_URL_FOR_TEST
         cls.api_key = "sk-123456"
 
@@ -31,7 +34,9 @@ def setUpClass(cls):
             other_args=[
                 # If your server needs extra parameters to test function calling, please add them here.
                 "--tool-call-parser",
-                "llama3",
+                cls.tool_call_parser,
+                "--tp",
+                str(cls.tp),
             ],
         )
         cls.base_url += "/v1"
@@ -41,36 +46,14 @@ def setUpClass(cls):
     def tearDownClass(cls):
         kill_process_tree(cls.process.pid)
 
-    def test_function_calling_format(self):
+    def test_function_calling_format_with_no_tool_choice_specified(self):
         """
         Test: Whether the function call format returned by the AI is correct.
         When returning a tool call, message.content should be None, and tool_calls should be a list.
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "add",
-                    "description": "Compute the sum of two numbers",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "a": {
-                                "type": "int",
-                                "description": "A number",
-                            },
-                            "b": {
-                                "type": "int",
-                                "description": "A number",
-                            },
-                        },
-                        "required": ["a", "b"],
-                    },
-                },
-            }
-        ]
+        tools = [self.get_add_tool()]
 
         messages = [{"role": "user", "content": "Compute (3+5)"}]
         response = client.chat.completions.create(
@@ -82,19 +65,73 @@ def test_function_calling_format(self):
             tools=tools,
         )
 
-        content = response.choices[0].message.content
-        tool_calls = response.choices[0].message.tool_calls
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+    
+    def test_function_calling_named_tool_choice(self):
+        """
+        Test: Whether the function call format returned by the AI is correct when using named function tool choice.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        assert content is None, (
-            "When function call is successful, message.content should be None, "
-            f"but got: {content}"
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+            tool_choice={"type": "function", "function": {"name": "add"}}
         )
-        assert (
-            isinstance(tool_calls, list) and len(tool_calls) > 0
-        ), "tool_calls should be a non-empty list"
 
-        function_name = tool_calls[0].function.name
-        assert function_name == "add", "Function name should be 'add'"
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+
+    def test_function_calling_required_tool_choice(self):
+        """
+        Test: Whether the function call format returned by the AI is correct when using required function tool choice.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+            tool_choice={"type": "required"}
+        )
+
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+
+    def test_function_calling_auto_tool_choice(self):
+        """
+        Test: Whether the function call format returned by the AI is correct when using auto function tool choice.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+            tool_choice={"type": "auto"}
+        )
+
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
 
     def test_function_calling_streaming_simple(self):
         """
@@ -105,28 +142,7 @@ def test_function_calling_streaming_simple(self):
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
         tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "get_current_weather",
-                    "description": "Get the current weather in a given location",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "city": {
-                                "type": "string",
-                                "description": "The city to find the weather for",
-                            },
-                            "unit": {
-                                "type": "string",
-                                "description": "Weather unit (celsius or fahrenheit)",
-                                "enum": ["celsius", "fahrenheit"],
-                            },
-                        },
-                        "required": ["city", "unit"],
-                    },
-                },
-            }
+            self.get_weather_tool()
         ]
 
         messages = [{"role": "user", "content": "What is the temperature in Paris?"}]
@@ -172,27 +188,7 @@ def test_function_calling_streaming_args_parsing(self):
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
         tools = [
-            {
-                "type": "function",
-                "function": {
-                    "name": "add",
-                    "description": "Compute the sum of two integers",
-                    "parameters": {
-                        "type": "object",
-                        "properties": {
-                            "a": {
-                                "type": "int",
-                                "description": "First integer",
-                            },
-                            "b": {
-                                "type": "int",
-                                "description": "Second integer",
-                            },
-                        },
-                        "required": ["a", "b"],
-                    },
-                },
-            }
+            self.get_add_tool()
         ]
 
         messages = [
@@ -245,5 +241,72 @@ def test_function_calling_streaming_args_parsing(self):
         self.assertEqual(args_obj["b"], 7, "Parameter b should be 7")
 
 
+    def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None):
+        content = response.choices[0].message.content
+        tool_calls = response.choices[0].message.tool_calls
+
+        assert content is None, (
+            "When function call is successful, message.content should be None, "
+            f"but got: {content}"
+        )
+        assert (
+            isinstance(tool_calls, list) and len(tool_calls) > 0
+        ), "tool_calls should be a non-empty list"
+
+        function_name = tool_calls[0].function.name
+        if expected_function_name is not None:
+            assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'"
+
+    def get_add_tool(self):
+        return {
+            "type": "function",
+            "function": {
+                "name": "add",
+                "description": "Compute the sum of two numbers",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "a": {
+                            "type": "int",
+                            "description": "A number",
+                        },
+                        "b": {
+                            "type": "int",
+                            "description": "A number",
+                        },
+                    },
+                    "required": ["a", "b"],
+                },
+            },
+        }
+
+    def get_weather_tool(self):
+        return {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The city to find the weather for",
+                        },
+                        "unit": {
+                            "type": "string",
+                            "description": "Weather unit (celsius or fahrenheit)",
+                            "enum": ["celsius", "fahrenheit"],
+                        },
+                        "required": ["city", "unit"],
+                    },
+                },
+            }
+        }
+
+
+
+
+
 if __name__ == "__main__":
     unittest.main()

From 23fb88da849a0840e40296884b4e92025a28c7dd Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 10:54:01 -0700
Subject: [PATCH 02/14] added feature compatibility for some models in CI

---
 .github/workflows/nightly-test.yml            |   6 +
 .../test_function_calling.py                  | 298 ++++++++++++++++++
 .../feature_compatibility/test_json_schema.py | 170 ++++++++++
 test/srt/run_suite.py                         |   4 +
 test/srt/test_function_calling.py             |  56 ++--
 5 files changed, 512 insertions(+), 22 deletions(-)
 create mode 100644 test/srt/feature_compatibility/test_function_calling.py
 create mode 100644 test/srt/feature_compatibility/test_json_schema.py

diff --git a/.github/workflows/nightly-test.yml b/.github/workflows/nightly-test.yml
index 23b8bb44d83..dccc91f6b52 100644
--- a/.github/workflows/nightly-test.yml
+++ b/.github/workflows/nightly-test.yml
@@ -32,3 +32,9 @@ jobs:
         run: |
           cd test/srt
           python3 run_suite.py --suite nightly --timeout-per-file 3600
+
+      - name: Feature Compatibility Regression Test
+        timeout-minutes: 120
+        run: |
+          cd test/srt
+          python3 run_suite.py --suite feature_compatibility_regression_test --timeout-per-file 3600
diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py
new file mode 100644
index 00000000000..e1ef05f6e28
--- /dev/null
+++ b/test/srt/feature_compatibility/test_function_calling.py
@@ -0,0 +1,298 @@
+import json
+import time
+import unittest
+from typing import Optional
+
+import openai
+
+from sglang.srt.hf_transformers_utils import get_tokenizer
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    popen_launch_server,
+)
+
+
+def setup_class(cls, tool_call_parser: str, grammar_backend: str, tp: int):
+    cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+    cls.tool_call_parser = tool_call_parser
+    cls.tp = tp
+    cls.base_url = DEFAULT_URL_FOR_TEST
+    cls.api_key = "sk-123456"
+    cls.grammar_backend = grammar_backend
+
+    # Start the local OpenAI Server
+    cls.process = popen_launch_server(
+        cls.model,
+        cls.base_url,
+        timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+        api_key=cls.api_key,
+        other_args=[
+            "--tool-call-parser",
+            cls.tool_call_parser,
+            "--tp",
+            str(cls.tp),
+            "--grammar-backend",
+            cls.grammar_backend,
+        ],
+    )
+    cls.base_url += "/v1"
+    cls.tokenizer = get_tokenizer(cls.model)
+
+
+class OpenAIServerFunctionCallingBase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_function_calling_format_no_tool_choice_specified(self):
+        """
+        Test: Whether the function call format returned by the AI is correct.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+        )
+
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+    
+    def test_function_calling_named_tool_choice(self):
+        """
+        Test: Whether the function call format returned by the AI is correct when using named function tool choice.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+            tool_choice={"type": "function", "function": {"name": "add"}}
+        )
+
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+
+    def test_function_calling_required_tool_choice(self):
+        """
+        Test: Whether the function call format returned by the AI is correct when using required function tool choice.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+            tool_choice={"type": "required"}
+        )
+
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+
+    def test_function_calling_auto_tool_choice(self):
+        """
+        Test: Whether the function call format returned by the AI is correct when using auto function tool choice.
+        When returning a tool call, message.content should be None, and tool_calls should be a list.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [self.get_add_tool()]
+
+        messages = [{"role": "user", "content": "Compute (3+5)"}]
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.8,
+            top_p=0.8,
+            stream=False,
+            tools=tools,
+            tool_choice={"type": "auto"}
+        )
+
+        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+
+    def test_function_calling_streaming_args_parsing(self):
+        """
+        Test: Whether the function call arguments returned in streaming mode can be correctly concatenated into valid JSON.
+        - The user request requires multiple parameters.
+        - AI may return the arguments in chunks that need to be concatenated.
+        """
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+
+        tools = [
+            self.get_add_tool()
+        ]
+
+        messages = [
+            {"role": "user", "content": "Please sum 5 and 7, just call the function."}
+        ]
+
+        response_stream = client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.9,
+            top_p=0.9,
+            stream=True,
+            tools=tools,
+        )
+
+        argument_fragments = []
+        function_name = None
+        for chunk in response_stream:
+            choice = chunk.choices[0]
+            if choice.delta.tool_calls:
+                tool_call = choice.delta.tool_calls[0]
+                # Record the function name on first occurrence
+                function_name = tool_call.function.name or function_name
+                # In case of multiple chunks, JSON fragments may need to be concatenated
+                if tool_call.function.arguments:
+                    argument_fragments.append(tool_call.function.arguments)
+
+        self.assertEqual(function_name, "add", "Function name should be 'add'")
+        joined_args = "".join(argument_fragments)
+        self.assertTrue(
+            len(joined_args) > 0,
+            "No parameter fragments were returned in the function call",
+        )
+
+        # Check whether the concatenated JSON is valid
+        try:
+            args_obj = json.loads(joined_args)
+        except json.JSONDecodeError:
+            self.fail(
+                "The concatenated tool call arguments are not valid JSON, parsing failed"
+            )
+
+        self.assertIn("a", args_obj, "Missing parameter 'a'")
+        self.assertIn("b", args_obj, "Missing parameter 'b'")
+        self.assertEqual(
+            args_obj["a"],
+            5,
+            "Parameter a should be 5",
+        )
+        self.assertEqual(args_obj["b"], 7, "Parameter b should be 7")
+
+
+    def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None):
+        content = response.choices[0].message.content
+        tool_calls = response.choices[0].message.tool_calls
+
+        assert content is None, (
+            "When function call is successful, message.content should be None, "
+            f"but got: {content}"
+        )
+        assert (
+            isinstance(tool_calls, list) and len(tool_calls) > 0
+        ), "tool_calls should be a non-empty list"
+
+        function_name = tool_calls[0].function.name
+        if expected_function_name is not None:
+            assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'"
+
+    def get_add_tool(self):
+        return {
+            "type": "function",
+            "function": {
+                "name": "add",
+                "description": "Compute the sum of two numbers",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "a": {
+                            "type": "int",
+                            "description": "A number",
+                        },
+                        "b": {
+                            "type": "int",
+                            "description": "A number",
+                        },
+                    },
+                    "required": ["a", "b"],
+                },
+            },
+        }
+
+    def get_weather_tool(self):
+        return {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "city": {
+                            "type": "string",
+                            "description": "The city to find the weather for",
+                        },
+                        "unit": {
+                            "type": "string",
+                            "description": "Weather unit (celsius or fahrenheit)",
+                            "enum": ["celsius", "fahrenheit"],
+                        },
+                        "required": ["city", "unit"],
+                    },
+                },
+            }
+        }
+
+
+class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+
+
+class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2)
+
+
+class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+
+
+class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2)
+
+
+class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, tool_call_parser="mistral", grammar_backend="outlines", tp=1)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
new file mode 100644
index 00000000000..e80e5b2dfab
--- /dev/null
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -0,0 +1,170 @@
+import json
+import unittest
+from concurrent.futures import ThreadPoolExecutor
+
+import openai
+import requests
+
+from sglang.srt.utils import kill_process_tree
+from sglang.test.test_utils import (
+    DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+    DEFAULT_URL_FOR_TEST,
+    popen_launch_server,
+)
+
+
+def setup_class(cls, backend: str, model: str, tp: int):
+    cls.model = model
+    cls.base_url = DEFAULT_URL_FOR_TEST
+    cls.json_schema = json.dumps(
+        {
+            "type": "object",
+            "properties": {
+                "name": {"type": "string"},
+                "population": {"type": "integer"},
+            },
+            "required": ["name", "population"],
+            "additionalProperties": False,
+        }
+    )
+
+    other_args = [
+        "--grammar-backend",
+        backend,
+        "--tp",
+        str(tp),
+    ]
+
+    cls.process = popen_launch_server(
+        cls.model,
+        cls.base_url,
+        timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
+        other_args=other_args,
+    )
+
+
+class TestJSONSchemaBase(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST)
+
+    @classmethod
+    def tearDownClass(cls):
+        kill_process_tree(cls.process.pid)
+
+    def test_json_openai(self):
+        client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1")
+
+        response = client.chat.completions.create(
+            model=self.model,
+            messages=[
+                {"role": "system", "content": "You are a helpful AI assistant"},
+                {"role": "user", "content": "Introduce the capital of France."},
+            ],
+            temperature=0,
+            max_tokens=128,
+            response_format={
+                "type": "json_schema",
+                "json_schema": {"name": "foo", "schema": json.loads(self.json_schema)},
+            },
+        )
+        text = response.choices[0].message.content
+
+        try:
+            js_obj = json.loads(text)
+        except (TypeError, json.decoder.JSONDecodeError):
+            print("JSONDecodeError", text)
+            raise
+
+        self.assertIsInstance(js_obj["name"], str)
+        self.assertIsInstance(js_obj["population"], int)
+
+
+# MetaLlama_3_1_8BInstruct
+
+class MetaLlama_3_1_8BInstructOutlines(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1)
+
+class MetaLlama_3_1_8BInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1)  
+
+class MetaLlama_3_1_8BInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1)      
+
+# MetaLlama_3_1_70BInstruct
+
+class MetaLlama_3_1_70BInstructOutlines(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2)
+
+class MetaLlama_3_1_70BInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2)
+
+class MetaLlama_3_1_70BInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2)
+
+# MetaLlama_3_2_11BVisionInstruct
+
+class MetaLlama_3_2_11BVisionInstruct(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2)
+
+class MetaLlama_3_2_11BVisionInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2)  
+
+class MetaLlama_3_2_11BVisionInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2)                
+
+# MetaLlama_3_3_70BInstruct
+
+class MetaLlama_3_3_70BInstructOutlines(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2)
+
+class MetaLlama_3_3_70BInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2)        
+
+class MetaLlama_3_3_70BInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2)                
+
+# MistralNemo12BInstruct
+
+class MistralNemo12BInstructOutlines(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1)
+
+class MistralNemo12BInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1)        
+
+class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1)                
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
index e4e5f32526a..2689d14034b 100644
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -80,6 +80,10 @@ class TestFile:
     "nightly": [
         TestFile("test_nightly_gsm8k_eval.py"),
     ],
+    "feature_compatibility_regression_test": [
+        TestFile("feature_compatibility/test_function_calling.py"),
+        TestFile("feature_compatibility/test_json_schema.py"),
+    ],
 }
 
 
diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py
index 1f5856acde4..5bdf92fd4ed 100644
--- a/test/srt/test_function_calling.py
+++ b/test/srt/test_function_calling.py
@@ -21,7 +21,7 @@ def setUpClass(cls):
         # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
         cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
         cls.tool_call_parser = "llama3"
-        cls.tp = 1        
+        cls.tp = 1
         cls.base_url = DEFAULT_URL_FOR_TEST
         cls.api_key = "sk-123456"
 
@@ -65,8 +65,12 @@ def test_function_calling_format_with_no_tool_choice_specified(self):
             tools=tools,
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
-    
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
+
     def test_function_calling_named_tool_choice(self):
         """
         Test: Whether the function call format returned by the AI is correct when using named function tool choice.
@@ -84,10 +88,14 @@ def test_function_calling_named_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "function", "function": {"name": "add"}}
+            tool_choice={"type": "function", "function": {"name": "add"}},
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
 
     def test_function_calling_required_tool_choice(self):
         """
@@ -106,10 +114,14 @@ def test_function_calling_required_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "required"}
+            tool_choice={"type": "required"},
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
 
     def test_function_calling_auto_tool_choice(self):
         """
@@ -128,10 +140,14 @@ def test_function_calling_auto_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "auto"}
+            tool_choice={"type": "auto"},
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
 
     def test_function_calling_streaming_simple(self):
         """
@@ -141,9 +157,7 @@ def test_function_calling_streaming_simple(self):
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [
-            self.get_weather_tool()
-        ]
+        tools = [self.get_weather_tool()]
 
         messages = [{"role": "user", "content": "What is the temperature in Paris?"}]
 
@@ -187,9 +201,7 @@ def test_function_calling_streaming_args_parsing(self):
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [
-            self.get_add_tool()
-        ]
+        tools = [self.get_add_tool()]
 
         messages = [
             {"role": "user", "content": "Please sum 5 and 7, just call the function."}
@@ -240,8 +252,9 @@ def test_function_calling_streaming_args_parsing(self):
         )
         self.assertEqual(args_obj["b"], 7, "Parameter b should be 7")
 
-
-    def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None):
+    def assert_tool_call_format(
+        self, response, expected_function_name: Optional[str] = None
+    ):
         content = response.choices[0].message.content
         tool_calls = response.choices[0].message.tool_calls
 
@@ -255,7 +268,9 @@ def assert_tool_call_format(self, response, expected_function_name : Optional[st
 
         function_name = tool_calls[0].function.name
         if expected_function_name is not None:
-            assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'"
+            assert (
+                function_name == expected_function_name
+            ), f"Function name should be '{expected_function_name}'"
 
     def get_add_tool(self):
         return {
@@ -301,12 +316,9 @@ def get_weather_tool(self):
                         "required": ["city", "unit"],
                     },
                 },
-            }
+            },
         }
 
 
-
-
-
 if __name__ == "__main__":
     unittest.main()

From 7b565aed730a600f0f7cc3f2046ac9cc0dba5645 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 12:48:13 -0700
Subject: [PATCH 03/14] reverted contents of test_function_calling to contents
 of base repo

---
 test/srt/test_function_calling.py | 243 +++++++++++-------------------
 1 file changed, 84 insertions(+), 159 deletions(-)

diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py
index 5bdf92fd4ed..d73db349689 100644
--- a/test/srt/test_function_calling.py
+++ b/test/srt/test_function_calling.py
@@ -1,7 +1,6 @@
 import json
 import time
 import unittest
-from typing import Optional
 
 import openai
 
@@ -15,13 +14,11 @@
 )
 
 
-class OpenAIServerFunctionCallingBase(unittest.TestCase):
+class TestOpenAIServerFunctionCalling(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST
         cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
-        cls.tool_call_parser = "llama3"
-        cls.tp = 1
         cls.base_url = DEFAULT_URL_FOR_TEST
         cls.api_key = "sk-123456"
 
@@ -34,9 +31,7 @@ def setUpClass(cls):
             other_args=[
                 # If your server needs extra parameters to test function calling, please add them here.
                 "--tool-call-parser",
-                cls.tool_call_parser,
-                "--tp",
-                str(cls.tp),
+                "llama3",
             ],
         )
         cls.base_url += "/v1"
@@ -46,65 +41,36 @@ def setUpClass(cls):
     def tearDownClass(cls):
         kill_process_tree(cls.process.pid)
 
-    def test_function_calling_format_with_no_tool_choice_specified(self):
+    def test_function_calling_format(self):
         """
         Test: Whether the function call format returned by the AI is correct.
         When returning a tool call, message.content should be None, and tool_calls should be a list.
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [self.get_add_tool()]
-
-        messages = [{"role": "user", "content": "Compute (3+5)"}]
-        response = client.chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=0.8,
-            top_p=0.8,
-            stream=False,
-            tools=tools,
-        )
-
-        self.assert_tool_call_format(
-            response,
-            expected_function_name="add",
-            expected_function_arguments=["a", "b"],
-        )
-
-    def test_function_calling_named_tool_choice(self):
-        """
-        Test: Whether the function call format returned by the AI is correct when using named function tool choice.
-        When returning a tool call, message.content should be None, and tool_calls should be a list.
-        """
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-
-        tools = [self.get_add_tool()]
-
-        messages = [{"role": "user", "content": "Compute (3+5)"}]
-        response = client.chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=0.8,
-            top_p=0.8,
-            stream=False,
-            tools=tools,
-            tool_choice={"type": "function", "function": {"name": "add"}},
-        )
-
-        self.assert_tool_call_format(
-            response,
-            expected_function_name="add",
-            expected_function_arguments=["a", "b"],
-        )
-
-    def test_function_calling_required_tool_choice(self):
-        """
-        Test: Whether the function call format returned by the AI is correct when using required function tool choice.
-        When returning a tool call, message.content should be None, and tool_calls should be a list.
-        """
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-
-        tools = [self.get_add_tool()]
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "add",
+                    "description": "Compute the sum of two numbers",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "a": {
+                                "type": "int",
+                                "description": "A number",
+                            },
+                            "b": {
+                                "type": "int",
+                                "description": "A number",
+                            },
+                        },
+                        "required": ["a", "b"],
+                    },
+                },
+            }
+        ]
 
         messages = [{"role": "user", "content": "Compute (3+5)"}]
         response = client.chat.completions.create(
@@ -114,40 +80,21 @@ def test_function_calling_required_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "required"},
         )
 
-        self.assert_tool_call_format(
-            response,
-            expected_function_name="add",
-            expected_function_arguments=["a", "b"],
-        )
-
-    def test_function_calling_auto_tool_choice(self):
-        """
-        Test: Whether the function call format returned by the AI is correct when using auto function tool choice.
-        When returning a tool call, message.content should be None, and tool_calls should be a list.
-        """
-        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
-
-        tools = [self.get_add_tool()]
+        content = response.choices[0].message.content
+        tool_calls = response.choices[0].message.tool_calls
 
-        messages = [{"role": "user", "content": "Compute (3+5)"}]
-        response = client.chat.completions.create(
-            model=self.model,
-            messages=messages,
-            temperature=0.8,
-            top_p=0.8,
-            stream=False,
-            tools=tools,
-            tool_choice={"type": "auto"},
+        assert content is None, (
+            "When function call is successful, message.content should be None, "
+            f"but got: {content}"
         )
+        assert (
+            isinstance(tool_calls, list) and len(tool_calls) > 0
+        ), "tool_calls should be a non-empty list"
 
-        self.assert_tool_call_format(
-            response,
-            expected_function_name="add",
-            expected_function_arguments=["a", "b"],
-        )
+        function_name = tool_calls[0].function.name
+        assert function_name == "add", "Function name should be 'add'"
 
     def test_function_calling_streaming_simple(self):
         """
@@ -157,7 +104,30 @@ def test_function_calling_streaming_simple(self):
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [self.get_weather_tool()]
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "get_current_weather",
+                    "description": "Get the current weather in a given location",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "city": {
+                                "type": "string",
+                                "description": "The city to find the weather for",
+                            },
+                            "unit": {
+                                "type": "string",
+                                "description": "Weather unit (celsius or fahrenheit)",
+                                "enum": ["celsius", "fahrenheit"],
+                            },
+                        },
+                        "required": ["city", "unit"],
+                    },
+                },
+            }
+        ]
 
         messages = [{"role": "user", "content": "What is the temperature in Paris?"}]
 
@@ -201,7 +171,29 @@ def test_function_calling_streaming_args_parsing(self):
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [self.get_add_tool()]
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "add",
+                    "description": "Compute the sum of two integers",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "a": {
+                                "type": "int",
+                                "description": "First integer",
+                            },
+                            "b": {
+                                "type": "int",
+                                "description": "Second integer",
+                            },
+                        },
+                        "required": ["a", "b"],
+                    },
+                },
+            }
+        ]
 
         messages = [
             {"role": "user", "content": "Please sum 5 and 7, just call the function."}
@@ -252,73 +244,6 @@ def test_function_calling_streaming_args_parsing(self):
         )
         self.assertEqual(args_obj["b"], 7, "Parameter b should be 7")
 
-    def assert_tool_call_format(
-        self, response, expected_function_name: Optional[str] = None
-    ):
-        content = response.choices[0].message.content
-        tool_calls = response.choices[0].message.tool_calls
-
-        assert content is None, (
-            "When function call is successful, message.content should be None, "
-            f"but got: {content}"
-        )
-        assert (
-            isinstance(tool_calls, list) and len(tool_calls) > 0
-        ), "tool_calls should be a non-empty list"
-
-        function_name = tool_calls[0].function.name
-        if expected_function_name is not None:
-            assert (
-                function_name == expected_function_name
-            ), f"Function name should be '{expected_function_name}'"
-
-    def get_add_tool(self):
-        return {
-            "type": "function",
-            "function": {
-                "name": "add",
-                "description": "Compute the sum of two numbers",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "a": {
-                            "type": "int",
-                            "description": "A number",
-                        },
-                        "b": {
-                            "type": "int",
-                            "description": "A number",
-                        },
-                    },
-                    "required": ["a", "b"],
-                },
-            },
-        }
-
-    def get_weather_tool(self):
-        return {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "city": {
-                            "type": "string",
-                            "description": "The city to find the weather for",
-                        },
-                        "unit": {
-                            "type": "string",
-                            "description": "Weather unit (celsius or fahrenheit)",
-                            "enum": ["celsius", "fahrenheit"],
-                        },
-                        "required": ["city", "unit"],
-                    },
-                },
-            },
-        }
-
 
 if __name__ == "__main__":
-    unittest.main()
+    unittest.main()
\ No newline at end of file

From 6b62164d23ded1a84ce1f702aa1df71430f5d684 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 14:45:08 -0700
Subject: [PATCH 04/14] possible fix to Llama32 function call parsing

---
 python/sglang/srt/function_call_parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/function_call_parser.py b/python/sglang/srt/function_call_parser.py
index 4ae8d0a0d01..2aac81e7559 100644
--- a/python/sglang/srt/function_call_parser.py
+++ b/python/sglang/srt/function_call_parser.py
@@ -427,8 +427,10 @@ def detect_and_parse(self, text: str, tools: List[Function]) -> List[ToolCallIte
             return StreamingParseResult(normal_text=text, calls=[])
 
         if "<|python_tag|>" in text:
-            _, action_text = text.split("<|python_tag|>")
+            normal_text, action_text = text.split("<|python_tag|>")
+            normal_text = normal_text.strip()
         else:
+            normal_text = ""
             action_text = text
 
         # Split by semicolon and process each part

From 3622d87aa39bd3a741470ecbd8b83ac04b29da0a Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 16:46:26 -0700
Subject: [PATCH 05/14] added qwen25 and fixed model specification for function
 calling

---
 .../test_function_calling.py                  | 29 ++++++++++++-------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py
index e1ef05f6e28..1f8b4c0ed25 100644
--- a/test/srt/feature_compatibility/test_function_calling.py
+++ b/test/srt/feature_compatibility/test_function_calling.py
@@ -15,13 +15,13 @@
 )
 
 
-def setup_class(cls, tool_call_parser: str, grammar_backend: str, tp: int):
-    cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST
+def setup_class(cls, model : str, tool_call_parser: str, grammar_backend: str, tp: int):
+    cls.model = model
     cls.tool_call_parser = tool_call_parser
+    cls.grammar_backend = grammar_backend
     cls.tp = tp
     cls.base_url = DEFAULT_URL_FOR_TEST
     cls.api_key = "sk-123456"
-    cls.grammar_backend = grammar_backend
 
     # Start the local OpenAI Server
     cls.process = popen_launch_server(
@@ -45,7 +45,7 @@ def setup_class(cls, tool_call_parser: str, grammar_backend: str, tp: int):
 class OpenAIServerFunctionCallingBase(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(cls, model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
 
     @classmethod
     def tearDownClass(cls):
@@ -267,32 +267,41 @@ def get_weather_tool(self):
 class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(cls, model="meta-llama/Llama-3.1-8B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1)
 
 
-class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase):
+@unittest.skip("Tool call parsing is broken for Llama 3.2 models")
+class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2)
+        setup_class(cls, model="meta-llama/Llama-3.2-1B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1)
 
+class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, model="meta-llama/Llama-3.1-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2)
 
 class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(cls, model="meta-llama/Llama-3.2-11B-Vision-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1)
 
 
 class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2)
+        setup_class(cls, model="meta-llama/Llama-3.3-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2)
 
 
 class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, tool_call_parser="mistral", grammar_backend="outlines", tp=1)
+        setup_class(cls, model="nvidia/Mistral-NeMo-12B-Instruct", tool_call_parser="mistral", grammar_backend="outlines", tp=1)
 
+class Qwen_2_5_7BInstruct(OpenAIServerFunctionCallingBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, model="Qwen/Qwen2.5-7B-Instruct", tool_call_parser="qwen25", grammar_backend="outlines", tp=1)
 
 if __name__ == "__main__":
     unittest.main()

From cb378d583b6fe250a5cbf96fb4ffe6f2cc5cb713 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 17:58:14 -0700
Subject: [PATCH 06/14] fixed test cases in feature compatibility for function
 calling

---
 .../test_function_calling.py                  | 158 +++++++++++++++---
 .../feature_compatibility/test_json_schema.py |  89 ++++++++--
 test/srt/test_function_calling.py             |   2 +-
 3 files changed, 209 insertions(+), 40 deletions(-)

diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py
index 1f8b4c0ed25..5e0562ee1a0 100644
--- a/test/srt/feature_compatibility/test_function_calling.py
+++ b/test/srt/feature_compatibility/test_function_calling.py
@@ -1,3 +1,35 @@
+"""
+
+python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_required_tool_choice
+
+"""
+
 import json
 import time
 import unittest
@@ -15,7 +47,7 @@
 )
 
 
-def setup_class(cls, model : str, tool_call_parser: str, grammar_backend: str, tp: int):
+def setup_class(cls, model: str, tool_call_parser: str, grammar_backend: str, tp: int):
     cls.model = model
     cls.tool_call_parser = tool_call_parser
     cls.grammar_backend = grammar_backend
@@ -45,7 +77,13 @@ def setup_class(cls, model : str, tool_call_parser: str, grammar_backend: str, t
 class OpenAIServerFunctionCallingBase(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(
+            cls,
+            model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST,
+            tool_call_parser="llama3",
+            grammar_backend="outlines",
+            tp=1,
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -70,8 +108,12 @@ def test_function_calling_format_no_tool_choice_specified(self):
             tools=tools,
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
-    
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
+
     def test_function_calling_named_tool_choice(self):
         """
         Test: Whether the function call format returned by the AI is correct when using named function tool choice.
@@ -89,10 +131,14 @@ def test_function_calling_named_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "function", "function": {"name": "add"}}
+            tool_choice={"type": "function", "function": {"name": "add"}},
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
 
     def test_function_calling_required_tool_choice(self):
         """
@@ -111,10 +157,14 @@ def test_function_calling_required_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "required"}
+            tool_choice={"type": "required"},
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
 
     def test_function_calling_auto_tool_choice(self):
         """
@@ -133,10 +183,14 @@ def test_function_calling_auto_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "auto"}
+            tool_choice={"type": "auto"},
         )
 
-        self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"])
+        self.assert_tool_call_format(
+            response,
+            expected_function_name="add",
+            expected_function_arguments=["a", "b"],
+        )
 
     def test_function_calling_streaming_args_parsing(self):
         """
@@ -146,9 +200,7 @@ def test_function_calling_streaming_args_parsing(self):
         """
         client = openai.Client(api_key=self.api_key, base_url=self.base_url)
 
-        tools = [
-            self.get_add_tool()
-        ]
+        tools = [self.get_add_tool()]
 
         messages = [
             {"role": "user", "content": "Please sum 5 and 7, just call the function."}
@@ -199,8 +251,12 @@ def test_function_calling_streaming_args_parsing(self):
         )
         self.assertEqual(args_obj["b"], 7, "Parameter b should be 7")
 
-
-    def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None):
+    def assert_tool_call_format(
+        self,
+        response,
+        expected_function_name: Optional[str] = None,
+        expected_function_arguments: Optional[list] = None,
+    ):
         content = response.choices[0].message.content
         tool_calls = response.choices[0].message.tool_calls
 
@@ -214,7 +270,15 @@ def assert_tool_call_format(self, response, expected_function_name : Optional[st
 
         function_name = tool_calls[0].function.name
         if expected_function_name is not None:
-            assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'"
+            assert (
+                function_name == expected_function_name
+            ), f"Function name should be '{expected_function_name}'"
+
+        if expected_function_arguments is not None:
+            actual_function_arguments = json.loads(tool_calls[0].function.arguments)
+            assert set(actual_function_arguments) == set(
+                expected_function_arguments
+            ), f"Function argument names should be {expected_function_arguments}, arguments were {actual_function_arguments}"
 
     def get_add_tool(self):
         return {
@@ -260,48 +324,94 @@ def get_weather_tool(self):
                         "required": ["city", "unit"],
                     },
                 },
-            }
+            },
         }
 
 
 class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="meta-llama/Llama-3.1-8B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(
+            cls,
+            model="meta-llama/Llama-3.1-8B-Instruct",
+            tool_call_parser="llama3",
+            grammar_backend="outlines",
+            tp=1,
+        )
 
 
 @unittest.skip("Tool call parsing is broken for Llama 3.2 models")
 class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="meta-llama/Llama-3.2-1B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(
+            cls,
+            model="meta-llama/Llama-3.2-1B-Instruct",
+            tool_call_parser="llama3",
+            grammar_backend="outlines",
+            tp=1,
+        )
+
 
 class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="meta-llama/Llama-3.1-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2)
+        setup_class(
+            cls,
+            model="meta-llama/Llama-3.1-70B-Instruct",
+            tool_call_parser="llama3",
+            grammar_backend="outlines",
+            tp=2,
+        )
+
 
 class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="meta-llama/Llama-3.2-11B-Vision-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1)
+        setup_class(
+            cls,
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            tool_call_parser="llama3",
+            grammar_backend="outlines",
+            tp=1,
+        )
 
 
 class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="meta-llama/Llama-3.3-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2)
+        setup_class(
+            cls,
+            model="meta-llama/Llama-3.3-70B-Instruct",
+            tool_call_parser="llama3",
+            grammar_backend="outlines",
+            tp=2,
+        )
 
 
 class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="nvidia/Mistral-NeMo-12B-Instruct", tool_call_parser="mistral", grammar_backend="outlines", tp=1)
+        setup_class(
+            cls,
+            model="nvidia/Mistral-NeMo-12B-Instruct",
+            tool_call_parser="mistral",
+            grammar_backend="outlines",
+            tp=1,
+        )
+
 
 class Qwen_2_5_7BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, model="Qwen/Qwen2.5-7B-Instruct", tool_call_parser="qwen25", grammar_backend="outlines", tp=1)
+        setup_class(
+            cls,
+            model="Qwen/Qwen2.5-7B-Instruct",
+            tool_call_parser="qwen25",
+            grammar_backend="outlines",
+            tp=1,
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
index e80e5b2dfab..e50e270ae10 100644
--- a/test/srt/feature_compatibility/test_json_schema.py
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -83,88 +83,147 @@ def test_json_openai(self):
 
 # MetaLlama_3_1_8BInstruct
 
+
 class MetaLlama_3_1_8BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1)
+        setup_class(
+            cls, backend="outlines", model="meta-llama/Llama-3.1-8B-Instruct", tp=1
+        )
+
 
 class MetaLlama_3_1_8BInstructXGrammar(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1)  
+        setup_class(
+            cls, backend="xgrammar", model="meta-llama/Llama-3.1-8B-Instruct", tp=1
+        )
+
 
 class MetaLlama_3_1_8BInstructLLGuidance(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1)      
+        setup_class(
+            cls, backend="llguidance", model="meta-llama/Llama-3.1-8B-Instruct", tp=1
+        )
+
 
 # MetaLlama_3_1_70BInstruct
 
+
 class MetaLlama_3_1_70BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2)
+        setup_class(
+            cls, backend="outlines", model="meta-llama/Llama-3.1-70B-Instruct", tp=2
+        )
+
 
 class MetaLlama_3_1_70BInstructXGrammar(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2)
+        setup_class(
+            cls, backend="xgrammar", model="meta-llama/Llama-3.1-70B-Instruct", tp=2
+        )
+
 
 class MetaLlama_3_1_70BInstructLLGuidance(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2)
+        setup_class(
+            cls, backend="llguidance", model="meta-llama/Llama-3.1-70B-Instruct", tp=2
+        )
+
 
 # MetaLlama_3_2_11BVisionInstruct
 
+
 class MetaLlama_3_2_11BVisionInstruct(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2)
+        setup_class(
+            cls,
+            backend="outlines",
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            tp=2,
+        )
+
 
 class MetaLlama_3_2_11BVisionInstructXGrammar(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2)  
+        setup_class(
+            cls,
+            backend="xgrammar",
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            tp=2,
+        )
+
 
 class MetaLlama_3_2_11BVisionInstructLLGuidance(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2)                
+        setup_class(
+            cls,
+            backend="llguidance",
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            tp=2,
+        )
+
 
 # MetaLlama_3_3_70BInstruct
 
+
 class MetaLlama_3_3_70BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2)
+        setup_class(
+            cls, backend="outlines", model="meta-llama/Llama-3.3-70B-Instruct", tp=2
+        )
+
 
 class MetaLlama_3_3_70BInstructXGrammar(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2)        
+        setup_class(
+            cls, backend="xgrammar", model="meta-llama/Llama-3.3-70B-Instruct", tp=2
+        )
+
 
 class MetaLlama_3_3_70BInstructLLGuidance(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2)                
+        setup_class(
+            cls, backend="llguidance", model="meta-llama/Llama-3.3-70B-Instruct", tp=2
+        )
+
 
 # MistralNemo12BInstruct
 
+
 class MistralNemo12BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1)
+        setup_class(
+            cls, backend="outlines", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1
+        )
+
 
 class MistralNemo12BInstructXGrammar(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="xgrammar", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1)        
+        setup_class(
+            cls, backend="xgrammar", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1
+        )
+
 
 class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="llguidance", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1)                
+        setup_class(
+            cls, backend="llguidance", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py
index d73db349689..24f341a5e47 100644
--- a/test/srt/test_function_calling.py
+++ b/test/srt/test_function_calling.py
@@ -246,4 +246,4 @@ def test_function_calling_streaming_args_parsing(self):
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()

From 0907dc2ef2bd0cfe8355ced3f87930ef813cf6b4 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 18:06:26 -0700
Subject: [PATCH 07/14] fixed tool choice spec in tests for function calling
 feature compatibility

---
 test/srt/feature_compatibility/test_function_calling.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py
index 5e0562ee1a0..c3ebdd958ed 100644
--- a/test/srt/feature_compatibility/test_function_calling.py
+++ b/test/srt/feature_compatibility/test_function_calling.py
@@ -157,7 +157,7 @@ def test_function_calling_required_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "required"},
+            tool_choice="required",
         )
 
         self.assert_tool_call_format(
@@ -183,7 +183,7 @@ def test_function_calling_auto_tool_choice(self):
             top_p=0.8,
             stream=False,
             tools=tools,
-            tool_choice={"type": "auto"},
+            tool_choice="auto",
         )
 
         self.assert_tool_call_format(

From 5b3414bc1001366c5d69476d4d87d08c08181a5e Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 18:34:35 -0700
Subject: [PATCH 08/14] fixed some test comments

---
 .../test_function_calling.py                  | 43 ++++++++++---------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py
index c3ebdd958ed..a0c59040670 100644
--- a/test/srt/feature_compatibility/test_function_calling.py
+++ b/test/srt/feature_compatibility/test_function_calling.py
@@ -4,29 +4,29 @@
 python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice
 python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice
 
-python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified
-python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice
-python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice
-
-python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified
-python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice
-python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice
-
-python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified
-python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice
-python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_required_tool_choice
 
 python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified
 python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice
 python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice
 
+python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_required_tool_choice
+
 python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified
 python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice
 python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice
 
-python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_format_no_tool_choice_specified
-python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_named_tool_choice
-python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_required_tool_choice
+python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_required_tool_choice
+
+python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_format_no_tool_choice_specified
+python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_named_tool_choice
+python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_required_tool_choice
 
 """
 
@@ -340,31 +340,32 @@ def setUpClass(cls):
         )
 
 
-@unittest.skip("Tool call parsing is broken for Llama 3.2 models")
-class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase):
+class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
         setup_class(
             cls,
-            model="meta-llama/Llama-3.2-1B-Instruct",
+            model="meta-llama/Llama-3.1-70B-Instruct",
             tool_call_parser="llama3",
             grammar_backend="outlines",
-            tp=1,
+            tp=2,
         )
 
 
-class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase):
+@unittest.skip("Tool call parsing is broken for Llama 3.2 models")
+class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):
         setup_class(
             cls,
-            model="meta-llama/Llama-3.1-70B-Instruct",
+            model="meta-llama/Llama-3.2-1B-Instruct",
             tool_call_parser="llama3",
             grammar_backend="outlines",
-            tp=2,
+            tp=1,
         )
 
 
+@unittest.skip("Tool call parsing is broken for Llama 3.2 models")
 class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase):
     @classmethod
     def setUpClass(cls):

From 29f0d7f33676cf6d3d8af11fdb2a557781e4db33 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 18:40:15 -0700
Subject: [PATCH 09/14] fixed model name for mistral nemo 12b in feature
 compatibility function calling test

---
 test/srt/feature_compatibility/test_function_calling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py
index a0c59040670..4d53c27cc60 100644
--- a/test/srt/feature_compatibility/test_function_calling.py
+++ b/test/srt/feature_compatibility/test_function_calling.py
@@ -395,7 +395,7 @@ class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase):
     def setUpClass(cls):
         setup_class(
             cls,
-            model="nvidia/Mistral-NeMo-12B-Instruct",
+            model="mistralai/Mistral-Nemo-Instruct-2407",
             tool_call_parser="mistral",
             grammar_backend="outlines",
             tp=1,

From 93b463424fe8a02a5fec92cc55cfc5f6f3b86c66 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 19:28:39 -0700
Subject: [PATCH 10/14] fixed some test cases and added some test cases in
 test_json_schema in feature_compatibility

---
 .../feature_compatibility/test_json_schema.py | 100 +++++++++++++++++-
 1 file changed, 95 insertions(+), 5 deletions(-)

diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
index e50e270ae10..37fcb92a415 100644
--- a/test/srt/feature_compatibility/test_json_schema.py
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -1,9 +1,39 @@
+"""
+
+python -m unittest test_json_schema.MetaLlama_3_1_8BInstructOutlines.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_1_8BInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_1_8BInstructLLGuidance.test_json_openai
+
+python -m unittest test_json_schema.MetaLlama_3_1_70BInstructOutlines.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_1_70BInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_1_70BInstructLLGuidance.test_json_openai
+
+python -m unittest test_json_schema.MetaLlama_3_2_1BInstructOutlines.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_2_1BInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_2_1BInstructLLGuidance.test_json_openai
+
+python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructOutlines.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructLLGuidance.test_json_openai
+
+python -m unittest test_json_schema.MetaLlama_3_3_70BInstructOutlines.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_3_70BInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.MetaLlama_3_3_70BInstructLLGuidance.test_json_openai
+
+python -m unittest test_json_schema.MistralNemo12BInstructOutlines.test_json_openai
+python -m unittest test_json_schema.MistralNemo12BInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.MistralNemo12BInstructLLGuidance.test_json_openai
+
+python -m unittest test_json_schema.Qwen_2_5_7BInstructOutlines.test_json_openai
+python -m unittest test_json_schema.Qwen_2_5_7BInstructXGrammar.test_json_openai
+python -m unittest test_json_schema.Qwen_2_5_7BInstructLLGuidance.test_json_openai
+
+"""
+
 import json
 import unittest
-from concurrent.futures import ThreadPoolExecutor
 
 import openai
-import requests
 
 from sglang.srt.utils import kill_process_tree
 from sglang.test.test_utils import (
@@ -135,6 +165,42 @@ def setUpClass(cls):
         )
 
 
+# MetaLlama_3_2_1BInstruct
+
+
+class MetaLlama_3_2_1BInstructOutlines(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(
+            cls,
+            backend="outlines",
+            model="meta-llama/Llama-3.2-1B-Instruct",
+            tp=1,
+        )
+
+
+class MetaLlama_3_2_1BInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(
+            cls,
+            backend="xgrammar",
+            model="meta-llama/Llama-3.2-1B-Instruct",
+            tp=1,
+        )
+
+
+class MetaLlama_3_2_1BInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(
+            cls,
+            backend="llguidance",
+            model="meta-llama/Llama-3.2-1B-Instruct",
+            tp=1,
+        )
+
+
 # MetaLlama_3_2_11BVisionInstruct
 
 
@@ -205,7 +271,7 @@ class MistralNemo12BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
         setup_class(
-            cls, backend="outlines", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1
+            cls, backend="outlines", model="mistralai/Mistral-Nemo-Instruct-2407", tp=1
         )
 
 
@@ -213,7 +279,7 @@ class MistralNemo12BInstructXGrammar(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
         setup_class(
-            cls, backend="xgrammar", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1
+            cls, backend="xgrammar", model="mistralai/Mistral-Nemo-Instruct-2407", tp=1
         )
 
 
@@ -221,9 +287,33 @@ class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
         setup_class(
-            cls, backend="llguidance", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1
+            cls,
+            backend="llguidance",
+            model="mistralai/Mistral-Nemo-Instruct-2407",
+            tp=1,
         )
 
 
+# Qwen_2_5_7BInstruct
+
+
+class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="outlines", model="qwen/Qwen-2.5-7B-Instruct", tp=1)
+
+
+class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="xgrammar", model="Qwen/Qwen2.5-7B-Instruct", tp=1)
+
+
+class Qwen_2_5_7BInstructLLGuidance(TestJSONSchemaBase):
+    @classmethod
+    def setUpClass(cls):
+        setup_class(cls, backend="llguidance", model="Qwen/Qwen2.5-7B-Instruct", tp=1)
+
+
 if __name__ == "__main__":
     unittest.main()

From 68457d4c638ca3fddc8715936c0792d102662998 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 20:19:43 -0700
Subject: [PATCH 11/14] changed tp for 11b vision test in test_json_schema in
 feature_compatibility

---
 test/srt/feature_compatibility/test_json_schema.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
index 37fcb92a415..42e425c670f 100644
--- a/test/srt/feature_compatibility/test_json_schema.py
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -204,14 +204,14 @@ def setUpClass(cls):
 # MetaLlama_3_2_11BVisionInstruct
 
 
-class MetaLlama_3_2_11BVisionInstruct(TestJSONSchemaBase):
+class MetaLlama_3_2_11BVisionInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
         setup_class(
             cls,
             backend="outlines",
             model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-            tp=2,
+            tp=1,
         )
 
 
@@ -222,7 +222,7 @@ def setUpClass(cls):
             cls,
             backend="xgrammar",
             model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-            tp=2,
+            tp=1,
         )
 
 
@@ -233,7 +233,7 @@ def setUpClass(cls):
             cls,
             backend="llguidance",
             model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-            tp=2,
+            tp=1,
         )
 
 

From 673cd613444c4cf7bca8c8c43bc1a5a25bf37dd7 Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 20:44:22 -0700
Subject: [PATCH 12/14] fixed typo in model name in feature_compatibility for
 json schema

---
 test/srt/feature_compatibility/test_json_schema.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
index 42e425c670f..98347a42b5b 100644
--- a/test/srt/feature_compatibility/test_json_schema.py
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -12,6 +12,7 @@
 python -m unittest test_json_schema.MetaLlama_3_2_1BInstructXGrammar.test_json_openai
 python -m unittest test_json_schema.MetaLlama_3_2_1BInstructLLGuidance.test_json_openai
 
+
 python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructOutlines.test_json_openai
 python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructXGrammar.test_json_openai
 python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructLLGuidance.test_json_openai
@@ -300,7 +301,7 @@ def setUpClass(cls):
 class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model="qwen/Qwen-2.5-7B-Instruct", tp=1)
+        setup_class(cls, backend="outlines", model="Qwen/Qwen-2.5-7B-Instruct", tp=1)
 
 
 class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase):

From ab20bf2fa57d27ea11c005fd145cc2a4e6f0e15c Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 20:46:32 -0700
Subject: [PATCH 13/14] fixed another typo

---
 test/srt/feature_compatibility/test_json_schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
index 98347a42b5b..fef352f2de1 100644
--- a/test/srt/feature_compatibility/test_json_schema.py
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -301,7 +301,7 @@ def setUpClass(cls):
 class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):
-        setup_class(cls, backend="outlines", model="Qwen/Qwen-2.5-7B-Instruct", tp=1)
+        setup_class(cls, backend="outlines", model="Qwen/Qwen2.5-7B-Instruct", tp=1)
 
 
 class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase):

From 4f7f118ffbf30b1cc98e88b29ca4dda04562220a Mon Sep 17 00:00:00 2001
From: Kyle Pena <kylepena@kyles-macbook-pro.turkey-marlin.ts.net>
Date: Fri, 21 Mar 2025 21:57:50 -0700
Subject: [PATCH 14/14] skipped a test where the engine fails to start in
 feature_compatibility

---
 test/srt/feature_compatibility/test_json_schema.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py
index fef352f2de1..9c20e0c3696 100644
--- a/test/srt/feature_compatibility/test_json_schema.py
+++ b/test/srt/feature_compatibility/test_json_schema.py
@@ -205,6 +205,7 @@ def setUpClass(cls):
 # MetaLlama_3_2_11BVisionInstruct
 
 
+@unittest.skip("Engine fails to start with 3.2-11b-vision-instruct and outlines")
 class MetaLlama_3_2_11BVisionInstructOutlines(TestJSONSchemaBase):
     @classmethod
     def setUpClass(cls):