diff --git a/.github/workflows/nightly-test.yml b/.github/workflows/nightly-test.yml index 23b8bb44d83..dccc91f6b52 100644 --- a/.github/workflows/nightly-test.yml +++ b/.github/workflows/nightly-test.yml @@ -32,3 +32,9 @@ jobs: run: | cd test/srt python3 run_suite.py --suite nightly --timeout-per-file 3600 + + - name: Feature Compatibility Regression Test + timeout-minutes: 120 + run: | + cd test/srt + python3 run_suite.py --suite feature_compatibility_regression_test --timeout-per-file 3600 diff --git a/python/sglang/srt/function_call_parser.py b/python/sglang/srt/function_call_parser.py index 4ae8d0a0d01..2aac81e7559 100644 --- a/python/sglang/srt/function_call_parser.py +++ b/python/sglang/srt/function_call_parser.py @@ -427,8 +427,10 @@ def detect_and_parse(self, text: str, tools: List[Function]) -> List[ToolCallIte return StreamingParseResult(normal_text=text, calls=[]) if "<|python_tag|>" in text: - _, action_text = text.split("<|python_tag|>") + normal_text, action_text = text.split("<|python_tag|>") + normal_text = normal_text.strip() else: + normal_text = "" action_text = text # Split by semicolon and process each part diff --git a/test/srt/feature_compatibility/test_function_calling.py b/test/srt/feature_compatibility/test_function_calling.py new file mode 100644 index 00000000000..4d53c27cc60 --- /dev/null +++ b/test/srt/feature_compatibility/test_function_calling.py @@ -0,0 +1,418 @@ +""" + +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_1_8BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_1_70BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_2_1BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_2_11BVisionInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MetaLlama_3_3_70BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.MistralNemo12BInstruct.test_function_calling_required_tool_choice + +python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_format_no_tool_choice_specified +python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_named_tool_choice +python -m unittest test_function_calling.Qwen_2_5_7BInstruct.test_function_calling_required_tool_choice + +""" + +import json +import time +import unittest +from typing import Optional + +import openai + +from sglang.srt.hf_transformers_utils import get_tokenizer +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +def setup_class(cls, model: str, tool_call_parser: str, grammar_backend: str, tp: int): + cls.model = model + cls.tool_call_parser = tool_call_parser + cls.grammar_backend = grammar_backend + cls.tp = tp + cls.base_url = DEFAULT_URL_FOR_TEST + cls.api_key = "sk-123456" + + # Start the local OpenAI Server + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + api_key=cls.api_key, + other_args=[ + "--tool-call-parser", + cls.tool_call_parser, + "--tp", + str(cls.tp), + "--grammar-backend", + cls.grammar_backend, + ], + ) + cls.base_url += "/v1" + cls.tokenizer = get_tokenizer(cls.model) + + +class OpenAIServerFunctionCallingBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_function_calling_format_no_tool_choice_specified(self): + """ + Test: Whether the function call format returned by the AI is correct. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + ) + + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) + + def test_function_calling_named_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using named function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice={"type": "function", "function": {"name": "add"}}, + ) + + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) + + def test_function_calling_required_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using required function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice="required", + ) + + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) + + def test_function_calling_auto_tool_choice(self): + """ + Test: Whether the function call format returned by the AI is correct when using auto function tool choice. + When returning a tool call, message.content should be None, and tool_calls should be a list. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [{"role": "user", "content": "Compute (3+5)"}] + response = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.8, + top_p=0.8, + stream=False, + tools=tools, + tool_choice="auto", + ) + + self.assert_tool_call_format( + response, + expected_function_name="add", + expected_function_arguments=["a", "b"], + ) + + def test_function_calling_streaming_args_parsing(self): + """ + Test: Whether the function call arguments returned in streaming mode can be correctly concatenated into valid JSON. + - The user request requires multiple parameters. + - AI may return the arguments in chunks that need to be concatenated. + """ + client = openai.Client(api_key=self.api_key, base_url=self.base_url) + + tools = [self.get_add_tool()] + + messages = [ + {"role": "user", "content": "Please sum 5 and 7, just call the function."} + ] + + response_stream = client.chat.completions.create( + model=self.model, + messages=messages, + temperature=0.9, + top_p=0.9, + stream=True, + tools=tools, + ) + + argument_fragments = [] + function_name = None + for chunk in response_stream: + choice = chunk.choices[0] + if choice.delta.tool_calls: + tool_call = choice.delta.tool_calls[0] + # Record the function name on first occurrence + function_name = tool_call.function.name or function_name + # In case of multiple chunks, JSON fragments may need to be concatenated + if tool_call.function.arguments: + argument_fragments.append(tool_call.function.arguments) + + self.assertEqual(function_name, "add", "Function name should be 'add'") + joined_args = "".join(argument_fragments) + self.assertTrue( + len(joined_args) > 0, + "No parameter fragments were returned in the function call", + ) + + # Check whether the concatenated JSON is valid + try: + args_obj = json.loads(joined_args) + except json.JSONDecodeError: + self.fail( + "The concatenated tool call arguments are not valid JSON, parsing failed" + ) + + self.assertIn("a", args_obj, "Missing parameter 'a'") + self.assertIn("b", args_obj, "Missing parameter 'b'") + self.assertEqual( + args_obj["a"], + 5, + "Parameter a should be 5", + ) + self.assertEqual(args_obj["b"], 7, "Parameter b should be 7") + + def assert_tool_call_format( + self, + response, + expected_function_name: Optional[str] = None, + expected_function_arguments: Optional[list] = None, + ): + content = response.choices[0].message.content + tool_calls = response.choices[0].message.tool_calls + + assert content is None, ( + "When function call is successful, message.content should be None, " + f"but got: {content}" + ) + assert ( + isinstance(tool_calls, list) and len(tool_calls) > 0 + ), "tool_calls should be a non-empty list" + + function_name = tool_calls[0].function.name + if expected_function_name is not None: + assert ( + function_name == expected_function_name + ), f"Function name should be '{expected_function_name}'" + + if expected_function_arguments is not None: + actual_function_arguments = json.loads(tool_calls[0].function.arguments) + assert set(actual_function_arguments) == set( + expected_function_arguments + ), f"Function argument names should be {expected_function_arguments}, arguments were {actual_function_arguments}" + + def get_add_tool(self): + return { + "type": "function", + "function": { + "name": "add", + "description": "Compute the sum of two numbers", + "parameters": { + "type": "object", + "properties": { + "a": { + "type": "int", + "description": "A number", + }, + "b": { + "type": "int", + "description": "A number", + }, + }, + "required": ["a", "b"], + }, + }, + } + + def get_weather_tool(self): + return { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "The city to find the weather for", + }, + "unit": { + "type": "string", + "description": "Weather unit (celsius or fahrenheit)", + "enum": ["celsius", "fahrenheit"], + }, + "required": ["city", "unit"], + }, + }, + }, + } + + +class MetaLlama_3_1_8BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="meta-llama/Llama-3.1-8B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) + + +class MetaLlama_3_1_70BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="meta-llama/Llama-3.1-70B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=2, + ) + + +@unittest.skip("Tool call parsing is broken for Llama 3.2 models") +class MetaLlama_3_2_1BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="meta-llama/Llama-3.2-1B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) + + +@unittest.skip("Tool call parsing is broken for Llama 3.2 models") +class MetaLlama_3_2_11BVisionInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=1, + ) + + +class MetaLlama_3_3_70BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="meta-llama/Llama-3.3-70B-Instruct", + tool_call_parser="llama3", + grammar_backend="outlines", + tp=2, + ) + + +class MistralNemo12BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="mistralai/Mistral-Nemo-Instruct-2407", + tool_call_parser="mistral", + grammar_backend="outlines", + tp=1, + ) + + +class Qwen_2_5_7BInstruct(OpenAIServerFunctionCallingBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + model="Qwen/Qwen2.5-7B-Instruct", + tool_call_parser="qwen25", + grammar_backend="outlines", + tp=1, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/feature_compatibility/test_json_schema.py b/test/srt/feature_compatibility/test_json_schema.py new file mode 100644 index 00000000000..9c20e0c3696 --- /dev/null +++ b/test/srt/feature_compatibility/test_json_schema.py @@ -0,0 +1,321 @@ +""" + +python -m unittest test_json_schema.MetaLlama_3_1_8BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_8BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_8BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_1_70BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_70BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_1_70BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_2_1BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_1BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_1BInstructLLGuidance.test_json_openai + + +python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_2_11BVisionInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MetaLlama_3_3_70BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_3_70BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MetaLlama_3_3_70BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.MistralNemo12BInstructOutlines.test_json_openai +python -m unittest test_json_schema.MistralNemo12BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.MistralNemo12BInstructLLGuidance.test_json_openai + +python -m unittest test_json_schema.Qwen_2_5_7BInstructOutlines.test_json_openai +python -m unittest test_json_schema.Qwen_2_5_7BInstructXGrammar.test_json_openai +python -m unittest test_json_schema.Qwen_2_5_7BInstructLLGuidance.test_json_openai + +""" + +import json +import unittest + +import openai + +from sglang.srt.utils import kill_process_tree +from sglang.test.test_utils import ( + DEFAULT_SMALL_MODEL_NAME_FOR_TEST, + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +def setup_class(cls, backend: str, model: str, tp: int): + cls.model = model + cls.base_url = DEFAULT_URL_FOR_TEST + cls.json_schema = json.dumps( + { + "type": "object", + "properties": { + "name": {"type": "string"}, + "population": {"type": "integer"}, + }, + "required": ["name", "population"], + "additionalProperties": False, + } + ) + + other_args = [ + "--grammar-backend", + backend, + "--tp", + str(tp), + ] + + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=other_args, + ) + + +class TestJSONSchemaBase(unittest.TestCase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model=DEFAULT_SMALL_MODEL_NAME_FOR_TEST) + + @classmethod + def tearDownClass(cls): + kill_process_tree(cls.process.pid) + + def test_json_openai(self): + client = openai.Client(api_key="EMPTY", base_url=f"{self.base_url}/v1") + + response = client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": "You are a helpful AI assistant"}, + {"role": "user", "content": "Introduce the capital of France."}, + ], + temperature=0, + max_tokens=128, + response_format={ + "type": "json_schema", + "json_schema": {"name": "foo", "schema": json.loads(self.json_schema)}, + }, + ) + text = response.choices[0].message.content + + try: + js_obj = json.loads(text) + except (TypeError, json.decoder.JSONDecodeError): + print("JSONDecodeError", text) + raise + + self.assertIsInstance(js_obj["name"], str) + self.assertIsInstance(js_obj["population"], int) + + +# MetaLlama_3_1_8BInstruct + + +class MetaLlama_3_1_8BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="outlines", model="meta-llama/Llama-3.1-8B-Instruct", tp=1 + ) + + +class MetaLlama_3_1_8BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="xgrammar", model="meta-llama/Llama-3.1-8B-Instruct", tp=1 + ) + + +class MetaLlama_3_1_8BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="llguidance", model="meta-llama/Llama-3.1-8B-Instruct", tp=1 + ) + + +# MetaLlama_3_1_70BInstruct + + +class MetaLlama_3_1_70BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="outlines", model="meta-llama/Llama-3.1-70B-Instruct", tp=2 + ) + + +class MetaLlama_3_1_70BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="xgrammar", model="meta-llama/Llama-3.1-70B-Instruct", tp=2 + ) + + +class MetaLlama_3_1_70BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="llguidance", model="meta-llama/Llama-3.1-70B-Instruct", tp=2 + ) + + +# MetaLlama_3_2_1BInstruct + + +class MetaLlama_3_2_1BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="outlines", + model="meta-llama/Llama-3.2-1B-Instruct", + tp=1, + ) + + +class MetaLlama_3_2_1BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="xgrammar", + model="meta-llama/Llama-3.2-1B-Instruct", + tp=1, + ) + + +class MetaLlama_3_2_1BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="llguidance", + model="meta-llama/Llama-3.2-1B-Instruct", + tp=1, + ) + + +# MetaLlama_3_2_11BVisionInstruct + + +@unittest.skip("Engine fails to start with 3.2-11b-vision-instruct and outlines") +class MetaLlama_3_2_11BVisionInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="outlines", + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tp=1, + ) + + +class MetaLlama_3_2_11BVisionInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="xgrammar", + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tp=1, + ) + + +class MetaLlama_3_2_11BVisionInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="llguidance", + model="meta-llama/Llama-3.2-11B-Vision-Instruct", + tp=1, + ) + + +# MetaLlama_3_3_70BInstruct + + +class MetaLlama_3_3_70BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="outlines", model="meta-llama/Llama-3.3-70B-Instruct", tp=2 + ) + + +class MetaLlama_3_3_70BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="xgrammar", model="meta-llama/Llama-3.3-70B-Instruct", tp=2 + ) + + +class MetaLlama_3_3_70BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="llguidance", model="meta-llama/Llama-3.3-70B-Instruct", tp=2 + ) + + +# MistralNemo12BInstruct + + +class MistralNemo12BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="outlines", model="mistralai/Mistral-Nemo-Instruct-2407", tp=1 + ) + + +class MistralNemo12BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, backend="xgrammar", model="mistralai/Mistral-Nemo-Instruct-2407", tp=1 + ) + + +class MistralNemo12BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class( + cls, + backend="llguidance", + model="mistralai/Mistral-Nemo-Instruct-2407", + tp=1, + ) + + +# Qwen_2_5_7BInstruct + + +class Qwen_2_5_7BInstructOutlines(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="outlines", model="Qwen/Qwen2.5-7B-Instruct", tp=1) + + +class Qwen_2_5_7BInstructXGrammar(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="xgrammar", model="Qwen/Qwen2.5-7B-Instruct", tp=1) + + +class Qwen_2_5_7BInstructLLGuidance(TestJSONSchemaBase): + @classmethod + def setUpClass(cls): + setup_class(cls, backend="llguidance", model="Qwen/Qwen2.5-7B-Instruct", tp=1) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index e4e5f32526a..2689d14034b 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -80,6 +80,10 @@ class TestFile: "nightly": [ TestFile("test_nightly_gsm8k_eval.py"), ], + "feature_compatibility_regression_test": [ + TestFile("feature_compatibility/test_function_calling.py"), + TestFile("feature_compatibility/test_json_schema.py"), + ], }