From 864aa97b8c6dc23d552799a0d8b6182282d6e967 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Fri, 30 Aug 2024 14:35:48 +0100 Subject: [PATCH 1/6] Update azure.py llama function call parsing 1. Update the llama parsing for Llama calls with functions, where the functions are not used to produce the response. 2. Remove useless chunk code from provider.py --- llmstudio/engine/providers/azure.py | 137 +++++++++++++------------ llmstudio/engine/providers/provider.py | 22 ---- 2 files changed, 70 insertions(+), 89 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 1bb61516..418553e8 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -14,6 +14,7 @@ Union, ) +import ast # Add this import to safely evaluate string representations of lists/dicts import openai from fastapi import HTTPException from openai import AzureOpenAI, OpenAI @@ -174,7 +175,9 @@ async def handle_tool_response( function_call_buffer = "" saving = False + normal_call_chunks = [] for chunk in response: + chunk if chunk.choices[0].delta.content is not None: if ( "§" in chunk.choices[0].delta.content @@ -224,8 +227,11 @@ async def handle_tool_response( yield finish_chunk else: - yield chunk.model_dump() - + normal_call_chunks.append(chunk) + if chunk.choices[0].finish_reason == "stop": + for chunk in normal_call_chunks: + yield chunk.model_dump() + def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict: return ChatCompletionChunk( id=str(uuid.uuid4()), @@ -433,14 +439,15 @@ def add_tool_instructions(self, tools: list) -> str: tool_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: §{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}} +IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL. Here is an example of the output I desiere when performing function call: §{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}} +NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done. Reminder: - Function calls MUST follow the specified format. - Only call one function at a time. - - NEVER call more than one function at a time. - Required parameters MUST be specified. - Put the entire function call reply on one line. - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. @@ -456,10 +463,10 @@ def add_function_instructions(self, functions: list) -> str: for func in functions: function_prompt += ( - f"Use the function '{func['name']}' to '{func['description']}':\n" + f"Use the function '{func['name']}' to: '{func['description']}'\n" ) params_info = json.dumps(func["parameters"], indent=4) - function_prompt += f"Parameters format:\n{params_info}\n\n" + function_prompt += f"{params_info}\n\n" function_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: @@ -477,7 +484,6 @@ def add_function_instructions(self, functions: list) -> str: - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. - If you have already called a function and got the response for the user's question, please reply with the response. """ - return function_prompt def add_conversation(self, openai_message: list, llama_message: str) -> str: @@ -485,66 +491,63 @@ def add_conversation(self, openai_message: list, llama_message: str) -> str: for message in openai_message: if message["role"] == "system": continue - elif "tool_calls" in message: - for tool_call in message["tool_calls"]: - function_name = tool_call["function"]["name"] - arguments = tool_call["function"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif "tool_call_id" in message: - tool_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {tool_response} - <|eot_id|> - """ - ) - elif "function_call" in message: - function_name = message["function_call"]["name"] - arguments = message["function_call"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) - elif message["role"] == "function": - function_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {function_response} - <|eot_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) + elif message["role"] == "user" and isinstance(message["content"], str): + try: + # Attempt to safely evaluate the string to a Python object + content_as_list = ast.literal_eval(message["content"]) + if isinstance(content_as_list, list): + # If the content is a list, process each nested message + for nested_message in content_as_list: + conversation_parts.append(self.format_message(nested_message)) + else: + # If the content is not a list, append it directly + conversation_parts.append(self.format_message(message)) + except (ValueError, SyntaxError): + # If evaluation fails or content is not a list/dict string, append the message directly + conversation_parts.append(self.format_message(message)) + else: + # For all other messages, use the existing formatting logic + conversation_parts.append(self.format_message(message)) return llama_message + "".join(conversation_parts) + + def format_message(self, message: dict) -> str: + """Format a single message for the conversation.""" + if "tool_calls" in message: + for tool_call in message["tool_calls"]: + function_name = tool_call["function"]["name"] + arguments = tool_call["function"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif "tool_call_id" in message: + tool_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {tool_response} + <|eot_id|> + """ + elif "function_call" in message: + function_name = message["function_call"]["name"] + arguments = message["function_call"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif message["role"] in ["assistant", "user"] and message["content"] is not None: + return f""" + <|start_header_id|>{message['role']}<|end_header_id|> + {message['content']} + <|eot_id|> + """ + elif message["role"] == "function": + function_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {function_response} + <|eot_id|> + """ + return "" \ No newline at end of file diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 6c37bbf9..c4204952 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -268,28 +268,6 @@ def join_chunks(self, chunks, request): ): function_call_arguments += chunk.get("arguments") - chunk = ChatCompletion( - id=chunks[-1].get("id"), - created=chunks[-1].get("created"), - model=chunks[-1].get("model"), - object="chat.completion", - choices=[ - Choice( - finish_reason="function_call", - index=0, - logprobs=None, - message=ChatCompletionMessage( - content=None, - role="assistant", - tool_calls=None, - function_call=FunctionCall( - arguments=function_call_arguments, - name=function_call_name, - ), - ), - ) - ], - ) return ( ChatCompletion( id=chunks[-1].get("id"), From a98aafe8d3c63a0a261c5f52541f4df5e33c0e61 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Fri, 30 Aug 2024 15:28:16 +0100 Subject: [PATCH 2/6] Solve Lint issues --- llmstudio/engine/providers/azure.py | 14 +++++++++----- llmstudio/llm/langchain.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 418553e8..0ccfc452 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -1,3 +1,4 @@ +import ast # Add this import to safely evaluate string representations of lists/dicts import asyncio import json import os @@ -14,7 +15,6 @@ Union, ) -import ast # Add this import to safely evaluate string representations of lists/dicts import openai from fastapi import HTTPException from openai import AzureOpenAI, OpenAI @@ -231,7 +231,7 @@ async def handle_tool_response( if chunk.choices[0].finish_reason == "stop": for chunk in normal_call_chunks: yield chunk.model_dump() - + def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict: return ChatCompletionChunk( id=str(uuid.uuid4()), @@ -498,7 +498,9 @@ def add_conversation(self, openai_message: list, llama_message: str) -> str: if isinstance(content_as_list, list): # If the content is a list, process each nested message for nested_message in content_as_list: - conversation_parts.append(self.format_message(nested_message)) + conversation_parts.append( + self.format_message(nested_message) + ) else: # If the content is not a list, append it directly conversation_parts.append(self.format_message(message)) @@ -537,7 +539,9 @@ def format_message(self, message: dict) -> str: {arguments} <|eom_id|> """ - elif message["role"] in ["assistant", "user"] and message["content"] is not None: + elif ( + message["role"] in ["assistant", "user"] and message["content"] is not None + ): return f""" <|start_header_id|>{message['role']}<|end_header_id|> {message['content']} @@ -550,4 +554,4 @@ def format_message(self, message: dict) -> str: {function_response} <|eot_id|> """ - return "" \ No newline at end of file + return "" diff --git a/llmstudio/llm/langchain.py b/llmstudio/llm/langchain.py index a65cd922..e25610f3 100644 --- a/llmstudio/llm/langchain.py +++ b/llmstudio/llm/langchain.py @@ -22,7 +22,7 @@ def __init__(self, model_id: str, **kwargs): @property def _llm_type(self): return "LLMstudio" - + # @property # def model_id(self) -> str: # return self.model_id From b368b7772ba19287d290cfe06d1aa99bef91f560 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Mon, 2 Sep 2024 12:08:11 +0100 Subject: [PATCH 3/6] Update azure.py --- llmstudio/engine/providers/azure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 0ccfc452..92ffd577 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -177,7 +177,6 @@ async def handle_tool_response( saving = False normal_call_chunks = [] for chunk in response: - chunk if chunk.choices[0].delta.content is not None: if ( "§" in chunk.choices[0].delta.content From c173fa6a0ce1077a46a2c2b6f9bac0e6c856ae8c Mon Sep 17 00:00:00 2001 From: diogoazevedo15 <68329635+diogoazevedo15@users.noreply.github.com> Date: Mon, 2 Sep 2024 14:36:36 +0100 Subject: [PATCH 4/6] [update] azure-llama-function-call-parsing (#124) * Update azure.py llama function call parsing 1. Update the llama parsing for Llama calls with functions, where the functions are not used to produce the response. 2. Remove useless chunk code from provider.py * Solve Lint issues * Update azure.py --- llmstudio/engine/providers/azure.py | 138 +++++++++++++------------ llmstudio/engine/providers/provider.py | 22 ---- 2 files changed, 72 insertions(+), 88 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 1bb61516..92ffd577 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -1,3 +1,4 @@ +import ast # Add this import to safely evaluate string representations of lists/dicts import asyncio import json import os @@ -174,6 +175,7 @@ async def handle_tool_response( function_call_buffer = "" saving = False + normal_call_chunks = [] for chunk in response: if chunk.choices[0].delta.content is not None: if ( @@ -224,7 +226,10 @@ async def handle_tool_response( yield finish_chunk else: - yield chunk.model_dump() + normal_call_chunks.append(chunk) + if chunk.choices[0].finish_reason == "stop": + for chunk in normal_call_chunks: + yield chunk.model_dump() def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict: return ChatCompletionChunk( @@ -433,14 +438,15 @@ def add_tool_instructions(self, tools: list) -> str: tool_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: §{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}} +IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL. Here is an example of the output I desiere when performing function call: §{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}} +NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done. Reminder: - Function calls MUST follow the specified format. - Only call one function at a time. - - NEVER call more than one function at a time. - Required parameters MUST be specified. - Put the entire function call reply on one line. - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. @@ -456,10 +462,10 @@ def add_function_instructions(self, functions: list) -> str: for func in functions: function_prompt += ( - f"Use the function '{func['name']}' to '{func['description']}':\n" + f"Use the function '{func['name']}' to: '{func['description']}'\n" ) params_info = json.dumps(func["parameters"], indent=4) - function_prompt += f"Parameters format:\n{params_info}\n\n" + function_prompt += f"{params_info}\n\n" function_prompt += """ If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix: @@ -477,7 +483,6 @@ def add_function_instructions(self, functions: list) -> str: - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls. - If you have already called a function and got the response for the user's question, please reply with the response. """ - return function_prompt def add_conversation(self, openai_message: list, llama_message: str) -> str: @@ -485,66 +490,67 @@ def add_conversation(self, openai_message: list, llama_message: str) -> str: for message in openai_message: if message["role"] == "system": continue - elif "tool_calls" in message: - for tool_call in message["tool_calls"]: - function_name = tool_call["function"]["name"] - arguments = tool_call["function"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif "tool_call_id" in message: - tool_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {tool_response} - <|eot_id|> - """ - ) - elif "function_call" in message: - function_name = message["function_call"]["name"] - arguments = message["function_call"]["arguments"] - conversation_parts.append( - f""" - <|start_header_id|>assistant<|end_header_id|> - {arguments} - <|eom_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) - elif message["role"] == "function": - function_response = message["content"] - conversation_parts.append( - f""" - <|start_header_id|>ipython<|end_header_id|> - {function_response} - <|eot_id|> - """ - ) - elif ( - message["role"] in ["assistant", "user"] - and message["content"] is not None - ): - conversation_parts.append( - f""" - <|start_header_id|>{message['role']}<|end_header_id|> - {message['content']} - <|eot_id|> - """ - ) + elif message["role"] == "user" and isinstance(message["content"], str): + try: + # Attempt to safely evaluate the string to a Python object + content_as_list = ast.literal_eval(message["content"]) + if isinstance(content_as_list, list): + # If the content is a list, process each nested message + for nested_message in content_as_list: + conversation_parts.append( + self.format_message(nested_message) + ) + else: + # If the content is not a list, append it directly + conversation_parts.append(self.format_message(message)) + except (ValueError, SyntaxError): + # If evaluation fails or content is not a list/dict string, append the message directly + conversation_parts.append(self.format_message(message)) + else: + # For all other messages, use the existing formatting logic + conversation_parts.append(self.format_message(message)) return llama_message + "".join(conversation_parts) + + def format_message(self, message: dict) -> str: + """Format a single message for the conversation.""" + if "tool_calls" in message: + for tool_call in message["tool_calls"]: + function_name = tool_call["function"]["name"] + arguments = tool_call["function"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif "tool_call_id" in message: + tool_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {tool_response} + <|eot_id|> + """ + elif "function_call" in message: + function_name = message["function_call"]["name"] + arguments = message["function_call"]["arguments"] + return f""" + <|start_header_id|>assistant<|end_header_id|> + {arguments} + <|eom_id|> + """ + elif ( + message["role"] in ["assistant", "user"] and message["content"] is not None + ): + return f""" + <|start_header_id|>{message['role']}<|end_header_id|> + {message['content']} + <|eot_id|> + """ + elif message["role"] == "function": + function_response = message["content"] + return f""" + <|start_header_id|>ipython<|end_header_id|> + {function_response} + <|eot_id|> + """ + return "" diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 6c37bbf9..c4204952 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -268,28 +268,6 @@ def join_chunks(self, chunks, request): ): function_call_arguments += chunk.get("arguments") - chunk = ChatCompletion( - id=chunks[-1].get("id"), - created=chunks[-1].get("created"), - model=chunks[-1].get("model"), - object="chat.completion", - choices=[ - Choice( - finish_reason="function_call", - index=0, - logprobs=None, - message=ChatCompletionMessage( - content=None, - role="assistant", - tool_calls=None, - function_call=FunctionCall( - arguments=function_call_arguments, - name=function_call_name, - ), - ), - ) - ], - ) return ( ChatCompletion( id=chunks[-1].get("id"), From 66bc3ae3e7cd98acc85718b70293925811019ae9 Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Mon, 2 Sep 2024 18:06:24 +0100 Subject: [PATCH 5/6] Update input_to_string Updated the method input_to_string to ensure compatibility with vision models. --- llmstudio/engine/providers/azure.py | 2 +- llmstudio/engine/providers/provider.py | 44 ++++++++++---------------- 2 files changed, 17 insertions(+), 29 deletions(-) diff --git a/llmstudio/engine/providers/azure.py b/llmstudio/engine/providers/azure.py index 92ffd577..b4f4d349 100644 --- a/llmstudio/engine/providers/azure.py +++ b/llmstudio/engine/providers/azure.py @@ -1,4 +1,4 @@ -import ast # Add this import to safely evaluate string representations of lists/dicts +import ast import asyncio import json import os diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index c4204952..4c8d0c91 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -79,13 +79,14 @@ async def chat( if request.is_stream: return StreamingResponse(response_handler) else: - return JSONResponse(content=await response_handler.__anext__()) + return JSONResponse(content= await response_handler.__anext__()) except HTTPException as e: if e.status_code == 429: continue # Retry on rate limit error else: raise e # Raise other HTTP exceptions except Exception as e: + print(e) raise HTTPException( status_code=500, detail=str(e) ) # Raise other exceptions as HTTP 500 @@ -310,26 +311,6 @@ def join_chunks(self, chunks, request): ) ) - chunk = ChatCompletion( - id=chunks[-1].get("id"), - created=chunks[-1].get("created"), - model=chunks[-1].get("model"), - object="chat.completion", - choices=[ - Choice( - finish_reason="stop", - index=0, - logprobs=None, - message=ChatCompletionMessage( - content=stop_content, - role="assistant", - function_call=None, - tool_calls=None, - ), - ) - ], - ) - return ( ChatCompletion( id=chunks[-1].get("id"), @@ -406,13 +387,20 @@ def input_to_string(self, input): if isinstance(input, str): return input else: - return "".join( - [ - message.get("content", "") - for message in input - if message.get("content") is not None - ] - ) + result = [] + for message in input: + if message.get("content") is not None: + if isinstance(message["content"], str): + result.append(message["content"]) + elif isinstance(message["content"], list) and message.get("role") == "user": + for item in message["content"]: + if item.get("type") == "text": + result.append(item.get("text", "")) + elif item.get("type") == "image_url": + url = item.get("image_url", {}).get("url", "") + result.append(url) + return "".join(result) + def output_to_string(self, output): if output.choices[0].finish_reason == "stop": From ce74c6b920ec69d8051d324e6d37a07de86f23fe Mon Sep 17 00:00:00 2001 From: diogoazevedo15 Date: Mon, 2 Sep 2024 18:11:14 +0100 Subject: [PATCH 6/6] Fix lint issues --- llmstudio/engine/providers/provider.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llmstudio/engine/providers/provider.py b/llmstudio/engine/providers/provider.py index 4c8d0c91..354906c5 100644 --- a/llmstudio/engine/providers/provider.py +++ b/llmstudio/engine/providers/provider.py @@ -79,7 +79,7 @@ async def chat( if request.is_stream: return StreamingResponse(response_handler) else: - return JSONResponse(content= await response_handler.__anext__()) + return JSONResponse(content=await response_handler.__anext__()) except HTTPException as e: if e.status_code == 429: continue # Retry on rate limit error @@ -392,7 +392,10 @@ def input_to_string(self, input): if message.get("content") is not None: if isinstance(message["content"], str): result.append(message["content"]) - elif isinstance(message["content"], list) and message.get("role") == "user": + elif ( + isinstance(message["content"], list) + and message.get("role") == "user" + ): for item in message["content"]: if item.get("type") == "text": result.append(item.get("text", "")) @@ -401,7 +404,6 @@ def input_to_string(self, input): result.append(url) return "".join(result) - def output_to_string(self, output): if output.choices[0].finish_reason == "stop": return output.choices[0].message.content