From 6869210d9e2c54b5bc5979163d49610b32957ee4 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Thu, 20 Mar 2025 18:28:42 -0700 Subject: [PATCH 01/14] initial commit with new function calling base test class --- test/srt/test_function_calling.py | 221 +++++++++++++++++++----------- 1 file changed, 142 insertions(+), 79 deletions(-) diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py index 24f341a5e47..1f5856acde4 100644 --- a/test/srt/test_function_calling.py +++ b/test/srt/test_function_calling.py @@ -1,6 +1,7 @@ import json import time import unittest +from typing import Optional import openai @@ -14,11 +15,13 @@ ) -class TestOpenAIServerFunctionCalling(unittest.TestCase): +class OpenAIServerFunctionCallingBase(unittest.TestCase): @classmethod def setUpClass(cls): # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST + cls.tool_call_parser = "llama3" + cls.tp = 1 cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" @@ -31,7 +34,9 @@ def setUpClass(cls): other_args=[ # If your server needs extra parameters to test function calling, please add them here. "--tool-call-parser", - "llama3", + cls.tool_call_parser, + "--tp", + str(cls.tp), ], ) cls.base_url += "/v1" @@ -41,36 +46,14 @@ def setUpClass(cls): def tearDownClass(cls): kill_process_tree(cls.process.pid) - def test_function_calling_format(self): + def test_function_calling_format_with_no_tool_choice_specified(self): """ Test: Whether the function call format returned by the AI is correct. When returning a tool call, message.content should be None, and tool_calls should be a list. """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [ - { - "type": "function", - "function": { - "name": "add", - "description": "Compute the sum of two numbers", - "parameters": { - "type": "object", - "properties": { - "a": { - "type": "int", - "description": "A number", - }, - "b": { - "type": "int", - "description": "A number", - }, - }, - "required": ["a", "b"], - }, - }, - } - ] + tools = [self.get_add_tool()] messages = [{"role": "user", "content": "Compute (3+5)"}] response = client.chat.completions.create( @@ -82,19 +65,73 @@ def test_function_calling_format(self): tools=tools, ) - content = response.choices[0].message.content - tool_calls = response.choices[0].message.tool_calls + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_named_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using named function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) - assert content is None, ( - "When function call is successful, message.content should be None, " - f"but got: {content}" + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "function", "function": {"name": "add"}} ) - assert ( - isinstance(tool_calls, list) and len(tool_calls) > 0 - ), "tool_calls should be a non-empty list" - function_name = tool_calls[0].function.name - assert function_name == "add", "Function name should be 'add'" + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_required_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using required function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "required"} + ) + + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_auto_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using auto function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "auto"} + ) + + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) def test_function_calling_streaming_simple(self): """ @@ -105,28 +142,7 @@ def test_function_calling_streaming_simple(self): client = openai.Client(api_key=self.api_key, base_url=self.base_url) tools = [ - { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The city to find the weather for", - }, - "unit": { - "type": "string", - "description": "Weather unit (celsius or fahrenheit)", - "enum": ["celsius", "fahrenheit"], - }, - }, - "required": ["city", "unit"], - }, - }, - } + self.get_weather_tool() ] messages = [{"role": "user", "content": "What is the temperature in Paris?"}] @@ -172,27 +188,7 @@ def test_function_calling_streaming_args_parsing(self): client = openai.Client(api_key=self.api_key, base_url=self.base_url) tools = [ - { - "type": "function", - "function": { - "name": "add", - "description": "Compute the sum of two integers", - "parameters": { - "type": "object", - "properties": { - "a": { - "type": "int", - "description": "First integer", - }, - "b": { - "type": "int", - "description": "Second integer", - }, - }, - "required": ["a", "b"], - }, - }, - } + self.get_add_tool() ] messages = [ @@ -245,5 +241,72 @@ def test_function_calling_streaming_args_parsing(self): self.assertEqual(args_obj["b"], 7, "Parameter b should be 7") + def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None): + content = response.choices[0].message.content + tool_calls = response.choices[0].message.tool_calls + + assert content is None, ( + "When function call is successful, message.content should be None, " + f"but got: {content}" + ) + assert ( + isinstance(tool_calls, list) and len(tool_calls) > 0 + ), "tool_calls should be a non-empty list" + + function_name = tool_calls[0].function.name + if expected_function_name is not None: + assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'" + + def get_add_tool(self): + return { + "type": "function", + "function": { + "name": "add", + "description": "Compute the sum of two numbers", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "int", + "description": "A number", + }, + "b": { + "type": "int", + "description": "A number", + }, + }, + "required": ["a", "b"], + }, + }, + } + + def get_weather_tool(self): + return { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to find the weather for", + }, + "unit": { + "type": "string", + "description": "Weather unit (celsius or fahrenheit)", + "enum": ["celsius", "fahrenheit"], + }, + "required": ["city", "unit"], + }, + }, + } + } + + + + + if __name__ == "__main__": unittest.main() From 23fb88da849a0840e40296884b4e92025a28c7dd Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 10:54:01 -0700 Subject: [PATCH 02/14] added feature compatibility for some models in CI --- .github/workflows/nightly-test.yml | 6 + .../test_function_calling.py | 298 ++++++++++++++++++ .../feature_compatibility/test_json_schema.py | 170 ++++++++++ test/srt/run_suite.py | 4 + test/srt/test_function_calling.py | 56 ++-- 5 files changed, 512 insertions(+), 22 deletions(-) create mode 100644 test/srt/feature_compatibility/test_function_calling.py create mode 100644 test/srt/feature_compatibility/test_json_schema.py diff --git a/.github/workflows/nightly-test.yml b/.github/workflows/nightly-test.yml index 23b8bb44d83..dccc91f6b52 100644 --- a/.github/workflows/nightly-test.yml +++ b/.github/workflows/nightly-test.yml @@ -32,3 +32,9 @@ jobs: run: | cd test/srt python3 run_suite.py --suite nightly --timeout-per-file 3600 + + - name: Feature Compatibility Regression Test + timeout-minutes: 120 + run: | + cd test/srt + python3 run_suite.py --suite feature_compatibility_regression_test --timeout-per-file 3600 diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py new file mode 100644 index 00000000000..e1ef05f6e28 --- /dev/null +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -0,0 +1,298 @@ +import json +import time +import unittest +from typing import Optional + +import openai + +from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +def setup_class(cls, tool_call_parser: str, grammar_backend: str, tp: int): + cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST + cls.tool_call_parser = tool_call_parser + cls.tp = tp + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + cls.grammar_backend = grammar_backend + + # Start the local OpenAI Server + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + other_args=[ + "--tool-call-parser", + cls.tool_call_parser, + "--tp", + str(cls.tp), + "--grammar-backend", + cls.grammar_backend, + ], + ) + cls.base_url += "/v1" + cls.tokenizer = get_tokenizer(cls.model) + + +class OpenAIServerFunctionCallingBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_function_calling_format_no_tool_choice_specified(self): + """ + Test: Whether the function call format returned by the AI is correct. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + ) + + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_named_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using named function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "function", "function": {"name": "add"}} + ) + + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_required_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using required function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "required"} + ) + + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_auto_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using auto function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "auto"} + ) + + self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + + def test_function_calling_streaming_args_parsing(self): + """ + Test: Whether the function call arguments returned in streaming mode can be correctly concatenated into valid JSON. + - The user request requires multiple parameters. + - AI may return the arguments in chunks that need to be concatenated. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [ + self.get_add_tool() + ] + + messages = [ + {"role": "user", "content": "Please sum 5 and 7, just call the function."} + ] + + response_stream = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.9, + top_p=0.9, + stream=True, + tools=tools, + ) + + argument_fragments = [] + function_name = None + for chunk in response_stream: + choice = chunk.choices[0] + if choice.delta.tool_calls: + tool_call = choice.delta.tool_calls[0] + # Record the function name on first occurrence + function_name = tool_call.function.name or function_name + # In case of multiple chunks, JSON fragments may need to be concatenated + if tool_call.function.arguments: + argument_fragments.append(tool_call.function.arguments) + + self.assertEqual(function_name, "add", "Function name should be 'add'") + joined_args = "".join(argument_fragments) + self.assertTrue( + len(joined_args) > 0, + "No parameter fragments were returned in the function call", + ) + + # Check whether the concatenated JSON is valid + try: + args_obj = json.loads(joined_args) + except json.JSONDecodeError: + self.fail( + "The concatenated tool call arguments are not valid JSON, parsing failed" + ) + + self.assertIn("a", args_obj, "Missing parameter 'a'") + self.assertIn("b", args_obj, "Missing parameter 'b'") + self.assertEqual( + args_obj["a"], + 5, + "Parameter a should be 5", + ) + self.assertEqual(args_obj["b"], 7, "Parameter b should be 7") + + + def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None): + content = response.choices[0].message.content + tool_calls = response.choices[0].message.tool_calls + + assert content is None, ( + "When function call is successful, message.content should be None, " + f"but got: {content}" + ) + assert ( + isinstance(tool_calls, list) and len(tool_calls) > 0 + ), "tool_calls should be a non-empty list" + + function_name = tool_calls[0].function.name + if expected_function_name is not None: + assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'" + + def get_add_tool(self): + return { + "type": "function", + "function": { + "name": "add", + "description": "Compute the sum of two numbers", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "int", + "description": "A number", + }, + "b": { + "type": "int", + "description": "A number", + }, + }, + "required": ["a", "b"], + }, + }, + } + + def get_weather_tool(self): + return { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to find the weather for", + }, + "unit": { + "type": "string", + "description": "Weather unit (celsius or fahrenheit)", + "enum": ["celsius", "fahrenheit"], + }, + "required": ["city", "unit"], + }, + }, + } + } + + +class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + + +class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2) + + +class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + + +class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2) + + +class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, tool_call_parser="mistral", grammar_backend="outlines", tp=1) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py new file mode 100644 index 00000000000..e80e5b2dfab --- /dev/null +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -0,0 +1,170 @@ +import json +import unittest +from concurrent.futures import ThreadPoolExecutor + +import openai +import requests + +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +def setup_class(cls, backend: str, model: str, tp: int): + cls.model = model + cls.base_url = DEFAULT_URL_FOR_TEST + cls.json_schema = json.dumps( + { + "type": "object", + "properties": { + "name": {"type": "string"}, + "population": {"type": "integer"}, + }, + "required": ["name", "population"], + "additionalProperties": False, + } + ) + + other_args = [ + "--grammar-backend", + backend, + "--tp", + str(tp), + ] + + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + +class TestJSONSchemaBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_json_openai(self): + client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1") + + response = client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": "You are a helpful AI assistant"}, + {"role": "user", "content": "Introduce the capital of France."}, + ], + temperature=0, + max_tokens=128, + response_format={ + "type": "json_schema", + "json_schema": {"name": "foo", "schema": json.loads(self.json_schema)}, + }, + ) + text = response.choices[0].message.content + + try: + js_obj = json.loads(text) + except (TypeError, json.decoder.JSONDecodeError): + print("JSONDecodeError", text) + raise + + self.assertIsInstance(js_obj["name"], str) + self.assertIsInstance(js_obj["population"], int) + + +# MetaLlama_3_1_8BInstruct + +class MetaLlama_3_1_8BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1) + +class MetaLlama_3_1_8BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1) + +class MetaLlama_3_1_8BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1) + +# MetaLlama_3_1_70BInstruct + +class MetaLlama_3_1_70BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2) + +class MetaLlama_3_1_70BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2) + +class MetaLlama_3_1_70BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2) + +# MetaLlama_3_2_11BVisionInstruct + +class MetaLlama_3_2_11BVisionInstruct(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2) + +class MetaLlama_3_2_11BVisionInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2) + +class MetaLlama_3_2_11BVisionInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2) + +# MetaLlama_3_3_70BInstruct + +class MetaLlama_3_3_70BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2) + +class MetaLlama_3_3_70BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2) + +class MetaLlama_3_3_70BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2) + +# MistralNemo12BInstruct + +class MistralNemo12BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1) + +class MistralNemo12BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1) + +class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1) + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index e4e5f32526a..2689d14034b 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -80,6 +80,10 @@ class TestFile: "nightly": [ TestFile("test_nightly_gsm8k_eval.py"), ], + "feature_compatibility_regression_test": [ + TestFile("feature_compatibility/test_function_calling.py"), + TestFile("feature_compatibility/test_json_schema.py"), + ], } diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py index 1f5856acde4..5bdf92fd4ed 100644 --- a/test/srt/test_function_calling.py +++ b/test/srt/test_function_calling.py @@ -21,7 +21,7 @@ def setUpClass(cls): # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.tool_call_parser = "llama3" - cls.tp = 1 + cls.tp = 1 cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" @@ -65,8 +65,12 @@ def test_function_calling_format_with_no_tool_choice_specified(self): tools=tools, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) - + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) + def test_function_calling_named_tool_choice(self): """ Test: Whether the function call format returned by the AI is correct when using named function tool choice. @@ -84,10 +88,14 @@ def test_function_calling_named_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "function", "function": {"name": "add"}} + tool_choice={"type": "function", "function": {"name": "add"}}, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) def test_function_calling_required_tool_choice(self): """ @@ -106,10 +114,14 @@ def test_function_calling_required_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "required"} + tool_choice={"type": "required"}, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) def test_function_calling_auto_tool_choice(self): """ @@ -128,10 +140,14 @@ def test_function_calling_auto_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "auto"} + tool_choice={"type": "auto"}, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) def test_function_calling_streaming_simple(self): """ @@ -141,9 +157,7 @@ def test_function_calling_streaming_simple(self): """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [ - self.get_weather_tool() - ] + tools = [self.get_weather_tool()] messages = [{"role": "user", "content": "What is the temperature in Paris?"}] @@ -187,9 +201,7 @@ def test_function_calling_streaming_args_parsing(self): """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [ - self.get_add_tool() - ] + tools = [self.get_add_tool()] messages = [ {"role": "user", "content": "Please sum 5 and 7, just call the function."} @@ -240,8 +252,9 @@ def test_function_calling_streaming_args_parsing(self): ) self.assertEqual(args_obj["b"], 7, "Parameter b should be 7") - - def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None): + def assert_tool_call_format( + self, response, expected_function_name: Optional[str] = None + ): content = response.choices[0].message.content tool_calls = response.choices[0].message.tool_calls @@ -255,7 +268,9 @@ def assert_tool_call_format(self, response, expected_function_name : Optional[st function_name = tool_calls[0].function.name if expected_function_name is not None: - assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'" + assert ( + function_name == expected_function_name + ), f"Function name should be '{expected_function_name}'" def get_add_tool(self): return { @@ -301,12 +316,9 @@ def get_weather_tool(self): "required": ["city", "unit"], }, }, - } + }, } - - - if __name__ == "__main__": unittest.main() From 7b565aed730a600f0f7cc3f2046ac9cc0dba5645 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 12:48:13 -0700 Subject: [PATCH 03/14] reverted contents of test_function_calling to contents of base repo --- test/srt/test_function_calling.py | 243 +++++++++++------------------- 1 file changed, 84 insertions(+), 159 deletions(-) diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py index 5bdf92fd4ed..d73db349689 100644 --- a/test/srt/test_function_calling.py +++ b/test/srt/test_function_calling.py @@ -1,7 +1,6 @@ import json import time import unittest -from typing import Optional import openai @@ -15,13 +14,11 @@ ) -class OpenAIServerFunctionCallingBase(unittest.TestCase): +class TestOpenAIServerFunctionCalling(unittest.TestCase): @classmethod def setUpClass(cls): # Replace with the model name needed for testing; if not required, reuse DEFAULT_SMALL_MODEL_NAME_FOR_TEST cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST - cls.tool_call_parser = "llama3" - cls.tp = 1 cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" @@ -34,9 +31,7 @@ def setUpClass(cls): other_args=[ # If your server needs extra parameters to test function calling, please add them here. "--tool-call-parser", - cls.tool_call_parser, - "--tp", - str(cls.tp), + "llama3", ], ) cls.base_url += "/v1" @@ -46,65 +41,36 @@ def setUpClass(cls): def tearDownClass(cls): kill_process_tree(cls.process.pid) - def test_function_calling_format_with_no_tool_choice_specified(self): + def test_function_calling_format(self): """ Test: Whether the function call format returned by the AI is correct. When returning a tool call, message.content should be None, and tool_calls should be a list. """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [self.get_add_tool()] - - messages = [{"role": "user", "content": "Compute (3+5)"}] - response = client.chat.completions.create( - model=self.model, - messages=messages, - temperature=0.8, - top_p=0.8, - stream=False, - tools=tools, - ) - - self.assert_tool_call_format( - response, - expected_function_name="add", - expected_function_arguments=["a", "b"], - ) - - def test_function_calling_named_tool_choice(self): - """ - Test: Whether the function call format returned by the AI is correct when using named function tool choice. - When returning a tool call, message.content should be None, and tool_calls should be a list. - """ - client = openai.Client(api_key=self.api_key, base_url=self.base_url) - - tools = [self.get_add_tool()] - - messages = [{"role": "user", "content": "Compute (3+5)"}] - response = client.chat.completions.create( - model=self.model, - messages=messages, - temperature=0.8, - top_p=0.8, - stream=False, - tools=tools, - tool_choice={"type": "function", "function": {"name": "add"}}, - ) - - self.assert_tool_call_format( - response, - expected_function_name="add", - expected_function_arguments=["a", "b"], - ) - - def test_function_calling_required_tool_choice(self): - """ - Test: Whether the function call format returned by the AI is correct when using required function tool choice. - When returning a tool call, message.content should be None, and tool_calls should be a list. - """ - client = openai.Client(api_key=self.api_key, base_url=self.base_url) - - tools = [self.get_add_tool()] + tools = [ + { + "type": "function", + "function": { + "name": "add", + "description": "Compute the sum of two numbers", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "int", + "description": "A number", + }, + "b": { + "type": "int", + "description": "A number", + }, + }, + "required": ["a", "b"], + }, + }, + } + ] messages = [{"role": "user", "content": "Compute (3+5)"}] response = client.chat.completions.create( @@ -114,40 +80,21 @@ def test_function_calling_required_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "required"}, ) - self.assert_tool_call_format( - response, - expected_function_name="add", - expected_function_arguments=["a", "b"], - ) - - def test_function_calling_auto_tool_choice(self): - """ - Test: Whether the function call format returned by the AI is correct when using auto function tool choice. - When returning a tool call, message.content should be None, and tool_calls should be a list. - """ - client = openai.Client(api_key=self.api_key, base_url=self.base_url) - - tools = [self.get_add_tool()] + content = response.choices[0].message.content + tool_calls = response.choices[0].message.tool_calls - messages = [{"role": "user", "content": "Compute (3+5)"}] - response = client.chat.completions.create( - model=self.model, - messages=messages, - temperature=0.8, - top_p=0.8, - stream=False, - tools=tools, - tool_choice={"type": "auto"}, + assert content is None, ( + "When function call is successful, message.content should be None, " + f"but got: {content}" ) + assert ( + isinstance(tool_calls, list) and len(tool_calls) > 0 + ), "tool_calls should be a non-empty list" - self.assert_tool_call_format( - response, - expected_function_name="add", - expected_function_arguments=["a", "b"], - ) + function_name = tool_calls[0].function.name + assert function_name == "add", "Function name should be 'add'" def test_function_calling_streaming_simple(self): """ @@ -157,7 +104,30 @@ def test_function_calling_streaming_simple(self): """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [self.get_weather_tool()] + tools = [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to find the weather for", + }, + "unit": { + "type": "string", + "description": "Weather unit (celsius or fahrenheit)", + "enum": ["celsius", "fahrenheit"], + }, + }, + "required": ["city", "unit"], + }, + }, + } + ] messages = [{"role": "user", "content": "What is the temperature in Paris?"}] @@ -201,7 +171,29 @@ def test_function_calling_streaming_args_parsing(self): """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [self.get_add_tool()] + tools = [ + { + "type": "function", + "function": { + "name": "add", + "description": "Compute the sum of two integers", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "int", + "description": "First integer", + }, + "b": { + "type": "int", + "description": "Second integer", + }, + }, + "required": ["a", "b"], + }, + }, + } + ] messages = [ {"role": "user", "content": "Please sum 5 and 7, just call the function."} @@ -252,73 +244,6 @@ def test_function_calling_streaming_args_parsing(self): ) self.assertEqual(args_obj["b"], 7, "Parameter b should be 7") - def assert_tool_call_format( - self, response, expected_function_name: Optional[str] = None - ): - content = response.choices[0].message.content - tool_calls = response.choices[0].message.tool_calls - - assert content is None, ( - "When function call is successful, message.content should be None, " - f"but got: {content}" - ) - assert ( - isinstance(tool_calls, list) and len(tool_calls) > 0 - ), "tool_calls should be a non-empty list" - - function_name = tool_calls[0].function.name - if expected_function_name is not None: - assert ( - function_name == expected_function_name - ), f"Function name should be '{expected_function_name}'" - - def get_add_tool(self): - return { - "type": "function", - "function": { - "name": "add", - "description": "Compute the sum of two numbers", - "parameters": { - "type": "object", - "properties": { - "a": { - "type": "int", - "description": "A number", - }, - "b": { - "type": "int", - "description": "A number", - }, - }, - "required": ["a", "b"], - }, - }, - } - - def get_weather_tool(self): - return { - "type": "function", - "function": { - "name": "get_current_weather", - "description": "Get the current weather in a given location", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The city to find the weather for", - }, - "unit": { - "type": "string", - "description": "Weather unit (celsius or fahrenheit)", - "enum": ["celsius", "fahrenheit"], - }, - "required": ["city", "unit"], - }, - }, - }, - } - if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file From 6b62164d23ded1a84ce1f702aa1df71430f5d684 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 14:45:08 -0700 Subject: [PATCH 04/14] possible fix to Llama32 function call parsing --- python/sglang/srt/function_call_parser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/function_call_parser.py b/python/sglang/srt/function_call_parser.py index 4ae8d0a0d01..2aac81e7559 100644 --- a/python/sglang/srt/function_call_parser.py +++ b/python/sglang/srt/function_call_parser.py @@ -427,8 +427,10 @@ def detect_and_parse(self, text: str, tools: List[Function]) -> List[ToolCallIte return StreamingParseResult(normal_text=text, calls=[]) if "<|python_tag|>" in text: - _, action_text = text.split("<|python_tag|>") + normal_text, action_text = text.split("<|python_tag|>") + normal_text = normal_text.strip() else: + normal_text = "" action_text = text # Split by semicolon and process each part From 3622d87aa39bd3a741470ecbd8b83ac04b29da0a Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 16:46:26 -0700 Subject: [PATCH 05/14] added qwen25 and fixed model specification for function calling --- .../test_function_calling.py | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py index e1ef05f6e28..1f8b4c0ed25 100644 --- a/test/srt/feature_compatibility/test_function_calling.py +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -15,13 +15,13 @@ ) -def setup_class(cls, tool_call_parser: str, grammar_backend: str, tp: int): - cls.model = DEFAULT_SMALL_MODEL_NAME_FOR_TEST +def setup_class(cls, model : str, tool_call_parser: str, grammar_backend: str, tp: int): + cls.model = model cls.tool_call_parser = tool_call_parser + cls.grammar_backend = grammar_backend cls.tp = tp cls.base_url = DEFAULT_URL_FOR_TEST cls.api_key = "sk-123456" - cls.grammar_backend = grammar_backend # Start the local OpenAI Server cls.process = popen_launch_server( @@ -45,7 +45,7 @@ def setup_class(cls, tool_call_parser: str, grammar_backend: str, tp: int): class OpenAIServerFunctionCallingBase(unittest.TestCase): @classmethod def setUpClass(cls): - setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class(cls, model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, tool_call_parser="llama3", grammar_backend="outlines", tp=1) @classmethod def tearDownClass(cls): @@ -267,32 +267,41 @@ def get_weather_tool(self): class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class(cls, model="meta-llama/Llama-3.1-8B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1) -class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): +@unittest.skip("Tool call parsing is broken for Llama 3.2 models") +class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2) + setup_class(cls, model="meta-llama/Llama-3.2-1B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1) +class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, model="meta-llama/Llama-3.1-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2) class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class(cls, model="meta-llama/Llama-3.2-11B-Vision-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1) class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, tool_call_parser="llama3", grammar_backend="outlines", tp=2) + setup_class(cls, model="meta-llama/Llama-3.3-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2) class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, tool_call_parser="mistral", grammar_backend="outlines", tp=1) + setup_class(cls, model="nvidia/Mistral-NeMo-12B-Instruct", tool_call_parser="mistral", grammar_backend="outlines", tp=1) +class Qwen_2_5_7BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class(cls, model="Qwen/Qwen2.5-7B-Instruct", tool_call_parser="qwen25", grammar_backend="outlines", tp=1) if __name__ == "__main__": unittest.main() From cb378d583b6fe250a5cbf96fb4ffe6f2cc5cb713 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 17:58:14 -0700 Subject: [PATCH 06/14] fixed test cases in feature compatibility for function calling --- .../test_function_calling.py | 158 +++++++++++++++--- .../feature_compatibility/test_json_schema.py | 89 ++++++++-- test/srt/test_function_calling.py | 2 +- 3 files changed, 209 insertions(+), 40 deletions(-) diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py index 1f8b4c0ed25..5e0562ee1a0 100644 --- a/test/srt/feature_compatibility/test_function_calling.py +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -1,3 +1,35 @@ +""" + +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_required_tool_choice + +""" + import json import time import unittest @@ -15,7 +47,7 @@ ) -def setup_class(cls, model : str, tool_call_parser: str, grammar_backend: str, tp: int): +def setup_class(cls, model: str, tool_call_parser: str, grammar_backend: str, tp: int): cls.model = model cls.tool_call_parser = tool_call_parser cls.grammar_backend = grammar_backend @@ -45,7 +77,13 @@ def setup_class(cls, model : str, tool_call_parser: str, grammar_backend: str, t class OpenAIServerFunctionCallingBase(unittest.TestCase): @classmethod def setUpClass(cls): - setup_class(cls, model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class( + cls, + model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) @classmethod def tearDownClass(cls): @@ -70,8 +108,12 @@ def test_function_calling_format_no_tool_choice_specified(self): tools=tools, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) - + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) + def test_function_calling_named_tool_choice(self): """ Test: Whether the function call format returned by the AI is correct when using named function tool choice. @@ -89,10 +131,14 @@ def test_function_calling_named_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "function", "function": {"name": "add"}} + tool_choice={"type": "function", "function": {"name": "add"}}, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) def test_function_calling_required_tool_choice(self): """ @@ -111,10 +157,14 @@ def test_function_calling_required_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "required"} + tool_choice={"type": "required"}, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) def test_function_calling_auto_tool_choice(self): """ @@ -133,10 +183,14 @@ def test_function_calling_auto_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "auto"} + tool_choice={"type": "auto"}, ) - self.assert_tool_call_format(response, expected_function_name="add", expected_function_arguments=["a", "b"]) + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) def test_function_calling_streaming_args_parsing(self): """ @@ -146,9 +200,7 @@ def test_function_calling_streaming_args_parsing(self): """ client = openai.Client(api_key=self.api_key, base_url=self.base_url) - tools = [ - self.get_add_tool() - ] + tools = [self.get_add_tool()] messages = [ {"role": "user", "content": "Please sum 5 and 7, just call the function."} @@ -199,8 +251,12 @@ def test_function_calling_streaming_args_parsing(self): ) self.assertEqual(args_obj["b"], 7, "Parameter b should be 7") - - def assert_tool_call_format(self, response, expected_function_name : Optional[str] = None): + def assert_tool_call_format( + self, + response, + expected_function_name: Optional[str] = None, + expected_function_arguments: Optional[list] = None, + ): content = response.choices[0].message.content tool_calls = response.choices[0].message.tool_calls @@ -214,7 +270,15 @@ def assert_tool_call_format(self, response, expected_function_name : Optional[st function_name = tool_calls[0].function.name if expected_function_name is not None: - assert function_name == expected_function_name, f"Function name should be '{expected_function_name}'" + assert ( + function_name == expected_function_name + ), f"Function name should be '{expected_function_name}'" + + if expected_function_arguments is not None: + actual_function_arguments = json.loads(tool_calls[0].function.arguments) + assert set(actual_function_arguments) == set( + expected_function_arguments + ), f"Function argument names should be {expected_function_arguments}, arguments were {actual_function_arguments}" def get_add_tool(self): return { @@ -260,48 +324,94 @@ def get_weather_tool(self): "required": ["city", "unit"], }, }, - } + }, } class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="meta-llama/Llama-3.1-8B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class( + cls, + model="meta-llama/Llama-3.1-8B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) @unittest.skip("Tool call parsing is broken for Llama 3.2 models") class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="meta-llama/Llama-3.2-1B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class( + cls, + model="meta-llama/Llama-3.2-1B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) + class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="meta-llama/Llama-3.1-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2) + setup_class( + cls, + model="meta-llama/Llama-3.1-70B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=2, + ) + class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="meta-llama/Llama-3.2-11B-Vision-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=1) + setup_class( + cls, + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="meta-llama/Llama-3.3-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", tp=2) + setup_class( + cls, + model="meta-llama/Llama-3.3-70B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=2, + ) class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="nvidia/Mistral-NeMo-12B-Instruct", tool_call_parser="mistral", grammar_backend="outlines", tp=1) + setup_class( + cls, + model="nvidia/Mistral-NeMo-12B-Instruct", + tool_call_parser="mistral", + grammar_backend="outlines", + tp=1, + ) + class Qwen_2_5_7BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): - setup_class(cls, model="Qwen/Qwen2.5-7B-Instruct", tool_call_parser="qwen25", grammar_backend="outlines", tp=1) + setup_class( + cls, + model="Qwen/Qwen2.5-7B-Instruct", + tool_call_parser="qwen25", + grammar_backend="outlines", + tp=1, + ) + if __name__ == "__main__": unittest.main() diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py index e80e5b2dfab..e50e270ae10 100644 --- a/test/srt/feature_compatibility/test_json_schema.py +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -83,88 +83,147 @@ def test_json_openai(self): # MetaLlama_3_1_8BInstruct + class MetaLlama_3_1_8BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1) + setup_class( + cls, backend="outlines", model="meta-llama/Llama-3.1-8B-Instruct", tp=1 + ) + class MetaLlama_3_1_8BInstructXGrammar(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1) + setup_class( + cls, backend="xgrammar", model="meta-llama/Llama-3.1-8B-Instruct", tp=1 + ) + class MetaLlama_3_1_8BInstructLLGuidance(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-8B-Instruct", tp = 1) + setup_class( + cls, backend="llguidance", model="meta-llama/Llama-3.1-8B-Instruct", tp=1 + ) + # MetaLlama_3_1_70BInstruct + class MetaLlama_3_1_70BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2) + setup_class( + cls, backend="outlines", model="meta-llama/Llama-3.1-70B-Instruct", tp=2 + ) + class MetaLlama_3_1_70BInstructXGrammar(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2) + setup_class( + cls, backend="xgrammar", model="meta-llama/Llama-3.1-70B-Instruct", tp=2 + ) + class MetaLlama_3_1_70BInstructLLGuidance(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.1-70B-Instruct", tp = 2) + setup_class( + cls, backend="llguidance", model="meta-llama/Llama-3.1-70B-Instruct", tp=2 + ) + # MetaLlama_3_2_11BVisionInstruct + class MetaLlama_3_2_11BVisionInstruct(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2) + setup_class( + cls, + backend="outlines", + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tp=2, + ) + class MetaLlama_3_2_11BVisionInstructXGrammar(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2) + setup_class( + cls, + backend="xgrammar", + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tp=2, + ) + class MetaLlama_3_2_11BVisionInstructLLGuidance(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.2-11B-Vision-Instruct", tp = 2) + setup_class( + cls, + backend="llguidance", + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tp=2, + ) + # MetaLlama_3_3_70BInstruct + class MetaLlama_3_3_70BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2) + setup_class( + cls, backend="outlines", model="meta-llama/Llama-3.3-70B-Instruct", tp=2 + ) + class MetaLlama_3_3_70BInstructXGrammar(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="xgrammar", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2) + setup_class( + cls, backend="xgrammar", model="meta-llama/Llama-3.3-70B-Instruct", tp=2 + ) + class MetaLlama_3_3_70BInstructLLGuidance(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="llguidance", model = "meta-llama/Llama-3.3-70B-Instruct", tp = 2) + setup_class( + cls, backend="llguidance", model="meta-llama/Llama-3.3-70B-Instruct", tp=2 + ) + # MistralNemo12BInstruct + class MistralNemo12BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1) + setup_class( + cls, backend="outlines", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1 + ) + class MistralNemo12BInstructXGrammar(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="xgrammar", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1) + setup_class( + cls, backend="xgrammar", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1 + ) + class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="llguidance", model = "nvidia/Mistral-NeMo-12B-Instruct", tp = 1) + setup_class( + cls, backend="llguidance", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1 + ) + if __name__ == "__main__": unittest.main() diff --git a/test/srt/test_function_calling.py b/test/srt/test_function_calling.py index d73db349689..24f341a5e47 100644 --- a/test/srt/test_function_calling.py +++ b/test/srt/test_function_calling.py @@ -246,4 +246,4 @@ def test_function_calling_streaming_args_parsing(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() From 0907dc2ef2bd0cfe8355ced3f87930ef813cf6b4 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 18:06:26 -0700 Subject: [PATCH 07/14] fixed tool choice spec in tests for function calling feature compatibility --- test/srt/feature_compatibility/test_function_calling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py index 5e0562ee1a0..c3ebdd958ed 100644 --- a/test/srt/feature_compatibility/test_function_calling.py +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -157,7 +157,7 @@ def test_function_calling_required_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "required"}, + tool_choice="required", ) self.assert_tool_call_format( @@ -183,7 +183,7 @@ def test_function_calling_auto_tool_choice(self): top_p=0.8, stream=False, tools=tools, - tool_choice={"type": "auto"}, + tool_choice="auto", ) self.assert_tool_call_format( From 5b3414bc1001366c5d69476d4d87d08c08181a5e Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 18:34:35 -0700 Subject: [PATCH 08/14] fixed some test comments --- .../test_function_calling.py | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py index c3ebdd958ed..a0c59040670 100644 --- a/test/srt/feature_compatibility/test_function_calling.py +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -4,29 +4,29 @@ python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice -python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified -python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice -python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice - -python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified -python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice -python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice - -python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified -python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice -python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice +python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_required_tool_choice python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice +python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_required_tool_choice + python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice -python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_format_no_tool_choice_specified -python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_named_tool_choice -python -m unittest test_function_calling.Qwen25BInstruct.test_function_calling_required_tool_choice +python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_required_tool_choice """ @@ -340,31 +340,32 @@ def setUpClass(cls): ) -@unittest.skip("Tool call parsing is broken for Llama 3.2 models") -class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase): +class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): setup_class( cls, - model="meta-llama/Llama-3.2-1B-Instruct", + model="meta-llama/Llama-3.1-70B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", - tp=1, + tp=2, ) -class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): +@unittest.skip("Tool call parsing is broken for Llama 3.2 models") +class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): setup_class( cls, - model="meta-llama/Llama-3.1-70B-Instruct", + model="meta-llama/Llama-3.2-1B-Instruct", tool_call_parser="llama3", grammar_backend="outlines", - tp=2, + tp=1, ) +@unittest.skip("Tool call parsing is broken for Llama 3.2 models") class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase): @classmethod def setUpClass(cls): From 29f0d7f33676cf6d3d8af11fdb2a557781e4db33 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 18:40:15 -0700 Subject: [PATCH 09/14] fixed model name for mistral nemo 12b in feature compatibility function calling test --- test/srt/feature_compatibility/test_function_calling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py index a0c59040670..4d53c27cc60 100644 --- a/test/srt/feature_compatibility/test_function_calling.py +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -395,7 +395,7 @@ class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase): def setUpClass(cls): setup_class( cls, - model="nvidia/Mistral-NeMo-12B-Instruct", + model="mistralai/Mistral-Nemo-Instruct-2407", tool_call_parser="mistral", grammar_backend="outlines", tp=1, From 93b463424fe8a02a5fec92cc55cfc5f6f3b86c66 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 19:28:39 -0700 Subject: [PATCH 10/14] fixed some test cases and added some test cases in test_json_schema in feature_compatibility --- .../feature_compatibility/test_json_schema.py | 100 +++++++++++++++++- 1 file changed, 95 insertions(+), 5 deletions(-) diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py index e50e270ae10..37fcb92a415 100644 --- a/test/srt/feature_compatibility/test_json_schema.py +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -1,9 +1,39 @@ +""" + +python -m unittest test_json_schema.MetaLlama_3_1_8BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_8BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_8BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_1_70BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_70BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_70BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_2_1BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_1BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_1BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_3_70BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_3_70BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_3_70BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MistralNemo12BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MistralNemo12BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MistralNemo12BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.Qwen_2_5_7BInstructOutlines.test_json_openai +python -m unittest test_json_schema.Qwen_2_5_7BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.Qwen_2_5_7BInstructLLGuidance.test_json_openai + +""" + import json import unittest -from concurrent.futures import ThreadPoolExecutor import openai -import requests from sglang.srt.utils import kill_process_tree from sglang.test.test_utils import ( @@ -135,6 +165,42 @@ def setUpClass(cls): ) +# MetaLlama_3_2_1BInstruct + + +class MetaLlama_3_2_1BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="outlines", + model="meta-llama/Llama-3.2-1B-Instruct", + tp=1, + ) + + +class MetaLlama_3_2_1BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="xgrammar", + model="meta-llama/Llama-3.2-1B-Instruct", + tp=1, + ) + + +class MetaLlama_3_2_1BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="llguidance", + model="meta-llama/Llama-3.2-1B-Instruct", + tp=1, + ) + + # MetaLlama_3_2_11BVisionInstruct @@ -205,7 +271,7 @@ class MistralNemo12BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): setup_class( - cls, backend="outlines", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1 + cls, backend="outlines", model="mistralai/Mistral-Nemo-Instruct-2407", tp=1 ) @@ -213,7 +279,7 @@ class MistralNemo12BInstructXGrammar(TestJSONSchemaBase): @classmethod def setUpClass(cls): setup_class( - cls, backend="xgrammar", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1 + cls, backend="xgrammar", model="mistralai/Mistral-Nemo-Instruct-2407", tp=1 ) @@ -221,9 +287,33 @@ class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase): @classmethod def setUpClass(cls): setup_class( - cls, backend="llguidance", model="nvidia/Mistral-NeMo-12B-Instruct", tp=1 + cls, + backend="llguidance", + model="mistralai/Mistral-Nemo-Instruct-2407", + tp=1, ) +# Qwen_2_5_7BInstruct + + +class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model="qwen/Qwen-2.5-7B-Instruct", tp=1) + + +class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model="Qwen/Qwen2.5-7B-Instruct", tp=1) + + +class Qwen_2_5_7BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model="Qwen/Qwen2.5-7B-Instruct", tp=1) + + if __name__ == "__main__": unittest.main() From 68457d4c638ca3fddc8715936c0792d102662998 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 20:19:43 -0700 Subject: [PATCH 11/14] changed tp for 11b vision test in test_json_schema in feature_compatibility --- test/srt/feature_compatibility/test_json_schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py index 37fcb92a415..42e425c670f 100644 --- a/test/srt/feature_compatibility/test_json_schema.py +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -204,14 +204,14 @@ def setUpClass(cls): # MetaLlama_3_2_11BVisionInstruct -class MetaLlama_3_2_11BVisionInstruct(TestJSONSchemaBase): +class MetaLlama_3_2_11BVisionInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): setup_class( cls, backend="outlines", model="meta-llama/Llama-3.2-11B-Vision-Instruct", - tp=2, + tp=1, ) @@ -222,7 +222,7 @@ def setUpClass(cls): cls, backend="xgrammar", model="meta-llama/Llama-3.2-11B-Vision-Instruct", - tp=2, + tp=1, ) @@ -233,7 +233,7 @@ def setUpClass(cls): cls, backend="llguidance", model="meta-llama/Llama-3.2-11B-Vision-Instruct", - tp=2, + tp=1, ) From 673cd613444c4cf7bca8c8c43bc1a5a25bf37dd7 Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 20:44:22 -0700 Subject: [PATCH 12/14] fixed typo in model name in feature_compatibility for json schema --- test/srt/feature_compatibility/test_json_schema.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py index 42e425c670f..98347a42b5b 100644 --- a/test/srt/feature_compatibility/test_json_schema.py +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -12,6 +12,7 @@ python -m unittest test_json_schema.MetaLlama_3_2_1BInstructXGrammar.test_json_openai python -m unittest test_json_schema.MetaLlama_3_2_1BInstructLLGuidance.test_json_openai + python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructOutlines.test_json_openai python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructXGrammar.test_json_openai python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructLLGuidance.test_json_openai @@ -300,7 +301,7 @@ def setUpClass(cls): class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model="qwen/Qwen-2.5-7B-Instruct", tp=1) + setup_class(cls, backend="outlines", model="Qwen/Qwen-2.5-7B-Instruct", tp=1) class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase): From ab20bf2fa57d27ea11c005fd145cc2a4e6f0e15c Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 20:46:32 -0700 Subject: [PATCH 13/14] fixed another typo --- test/srt/feature_compatibility/test_json_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py index 98347a42b5b..fef352f2de1 100644 --- a/test/srt/feature_compatibility/test_json_schema.py +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -301,7 +301,7 @@ def setUpClass(cls): class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls): - setup_class(cls, backend="outlines", model="Qwen/Qwen-2.5-7B-Instruct", tp=1) + setup_class(cls, backend="outlines", model="Qwen/Qwen2.5-7B-Instruct", tp=1) class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase): From 4f7f118ffbf30b1cc98e88b29ca4dda04562220a Mon Sep 17 00:00:00 2001 From: Kyle Pena Date: Fri, 21 Mar 2025 21:57:50 -0700 Subject: [PATCH 14/14] skipped a test where the engine fails to start in feature_compatibility --- test/srt/feature_compatibility/test_json_schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py index fef352f2de1..9c20e0c3696 100644 --- a/test/srt/feature_compatibility/test_json_schema.py +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -205,6 +205,7 @@ def setUpClass(cls): # MetaLlama_3_2_11BVisionInstruct +@unittest.skip("Engine fails to start with 3.2-11b-vision-instruct and outlines") class MetaLlama_3_2_11BVisionInstructOutlines(TestJSONSchemaBase): @classmethod def setUpClass(cls):