Skip to content

Commit 3567855

Browse files
authored
Version 0.3.11 (#130)
## LLMstudio Version 0.3.11 ### What was done in this version: - Updated the method input_to_string in provider.py to ensure compatibility with vision models -- [PR 126](#126) - Added events to the startup process of tracking, ui and engine. This removes the race conditions we were experiencing repeatedly, also removes the need to run start_server() as early as possible -- [PR 129](#129). - Improved exception handling for invalid Azure endpoints -- [PR 129](#129). ### How it was tested: - Ran projects with LLMStudio server dependencies ### Additional notes: - Any breaking changes? - No - Any new dependencies added? - No - Any performance improvements? - Yes. Servers will be launched synchronously preventing parent PIDs to call LLMStudio before being up.
2 parents 21cc916 + 41f7c11 commit 3567855

File tree

8 files changed

+118
-129
lines changed

8 files changed

+118
-129
lines changed

llmstudio/engine/__init__.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
33
from pathlib import Path
4+
from threading import Event
45
from typing import Any, Dict, List, Optional, Union
56

67
import uvicorn
@@ -78,7 +79,9 @@ def _merge_configs(config1, config2):
7879
raise RuntimeError(f"Error in configuration data: {e}")
7980

8081

81-
def create_engine_app(config: EngineConfig = _load_engine_config()) -> FastAPI:
82+
def create_engine_app(
83+
started_event: Event, config: EngineConfig = _load_engine_config()
84+
) -> FastAPI:
8285
app = FastAPI(
8386
title=ENGINE_TITLE,
8487
description=ENGINE_DESCRIPTION,
@@ -162,14 +165,15 @@ async def export(request: Request):
162165

163166
@app.on_event("startup")
164167
async def startup_event():
168+
started_event.set()
165169
print(f"Running LLMstudio Engine on http://{ENGINE_HOST}:{ENGINE_PORT} ")
166170

167171
return app
168172

169173

170-
def run_engine_app():
174+
def run_engine_app(started_event: Event):
171175
try:
172-
engine = create_engine_app()
176+
engine = create_engine_app(started_event)
173177
uvicorn.run(
174178
engine,
175179
host=ENGINE_HOST,

llmstudio/engine/providers/azure.py

Lines changed: 81 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import ast
12
import asyncio
23
import json
34
import os
@@ -120,13 +121,17 @@ async def generate_client(
120121
**function_args,
121122
**request.parameters.model_dump(),
122123
}
123-
124124
# Perform the asynchronous call
125125
return await asyncio.to_thread(
126126
client.chat.completions.create, **combined_args
127127
)
128128

129-
except openai._exceptions.APIError as e:
129+
except openai._exceptions.APIConnectionError as e:
130+
raise HTTPException(
131+
status_code=404, detail=f"There was an error reaching the endpoint: {e}"
132+
)
133+
134+
except openai._exceptions.APIStatusError as e:
130135
raise HTTPException(status_code=e.status_code, detail=e.response.json())
131136

132137
def prepare_messages(self, request: AzureRequest):
@@ -174,6 +179,7 @@ async def handle_tool_response(
174179

175180
function_call_buffer = ""
176181
saving = False
182+
normal_call_chunks = []
177183
for chunk in response:
178184
if chunk.choices[0].delta.content is not None:
179185
if (
@@ -224,7 +230,13 @@ async def handle_tool_response(
224230
yield finish_chunk
225231

226232
else:
227-
yield chunk.model_dump()
233+
normal_call_chunks.append(chunk)
234+
if chunk.choices[0].finish_reason == "stop":
235+
for chunk in normal_call_chunks:
236+
normal_call_chunks.append(chunk)
237+
if chunk.choices[0].finish_reason == "stop":
238+
for chunk in normal_call_chunks:
239+
yield chunk.model_dump()
228240

229241
def create_tool_name_chunk(self, function_name: str, kwargs: dict) -> dict:
230242
return ChatCompletionChunk(
@@ -433,14 +445,15 @@ def add_tool_instructions(self, tools: list) -> str:
433445
tool_prompt += """
434446
If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix:
435447
§{"type": "function", "name": "FUNCTION_NAME", "parameters": {"PARAMETER_NAME": PARAMETER_VALUE}}
448+
IMPORTANT: IT IS VITAL THAT YOU NEVER ADD A PREFIX OR A SUFFIX TO THE FUNCTION CALL.
436449
437450
Here is an example of the output I desiere when performing function call:
438451
§{"type": "function", "name": "python_repl_ast", "parameters": {"query": "print(df.shape)"}}
452+
NOTE: There is no prefix before the symbol '§' and nothing comes after the call is done.
439453
440454
Reminder:
441455
- Function calls MUST follow the specified format.
442456
- Only call one function at a time.
443-
- NEVER call more than one function at a time.
444457
- Required parameters MUST be specified.
445458
- Put the entire function call reply on one line.
446459
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls.
@@ -456,10 +469,10 @@ def add_function_instructions(self, functions: list) -> str:
456469

457470
for func in functions:
458471
function_prompt += (
459-
f"Use the function '{func['name']}' to '{func['description']}':\n"
472+
f"Use the function '{func['name']}' to: '{func['description']}'\n"
460473
)
461474
params_info = json.dumps(func["parameters"], indent=4)
462-
function_prompt += f"Parameters format:\n{params_info}\n\n"
475+
function_prompt += f"{params_info}\n\n"
463476

464477
function_prompt += """
465478
If you choose to use a function to produce this response, ONLY reply in the following format with no prefix or suffix:
@@ -477,74 +490,74 @@ def add_function_instructions(self, functions: list) -> str:
477490
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls.
478491
- If you have already called a function and got the response for the user's question, please reply with the response.
479492
"""
480-
481493
return function_prompt
482494

483495
def add_conversation(self, openai_message: list, llama_message: str) -> str:
484496
conversation_parts = []
485497
for message in openai_message:
486498
if message["role"] == "system":
487499
continue
488-
elif "tool_calls" in message:
489-
for tool_call in message["tool_calls"]:
490-
function_name = tool_call["function"]["name"]
491-
arguments = tool_call["function"]["arguments"]
492-
conversation_parts.append(
493-
f"""
494-
<|start_header_id|>assistant<|end_header_id|>
495-
<function={function_name}>{arguments}</function>
496-
<|eom_id|>
497-
"""
498-
)
499-
elif "tool_call_id" in message:
500-
tool_response = message["content"]
501-
conversation_parts.append(
502-
f"""
503-
<|start_header_id|>ipython<|end_header_id|>
504-
{tool_response}
505-
<|eot_id|>
506-
"""
507-
)
508-
elif "function_call" in message:
509-
function_name = message["function_call"]["name"]
510-
arguments = message["function_call"]["arguments"]
511-
conversation_parts.append(
512-
f"""
513-
<|start_header_id|>assistant<|end_header_id|>
514-
<function={function_name}>{arguments}</function>
515-
<|eom_id|>
516-
"""
517-
)
518-
elif (
519-
message["role"] in ["assistant", "user"]
520-
and message["content"] is not None
521-
):
522-
conversation_parts.append(
523-
f"""
524-
<|start_header_id|>{message['role']}<|end_header_id|>
525-
{message['content']}
526-
<|eot_id|>
527-
"""
528-
)
529-
elif message["role"] == "function":
530-
function_response = message["content"]
531-
conversation_parts.append(
532-
f"""
533-
<|start_header_id|>ipython<|end_header_id|>
534-
{function_response}
535-
<|eot_id|>
536-
"""
537-
)
538-
elif (
539-
message["role"] in ["assistant", "user"]
540-
and message["content"] is not None
541-
):
542-
conversation_parts.append(
543-
f"""
544-
<|start_header_id|>{message['role']}<|end_header_id|>
545-
{message['content']}
546-
<|eot_id|>
547-
"""
548-
)
500+
elif message["role"] == "user" and isinstance(message["content"], str):
501+
try:
502+
# Attempt to safely evaluate the string to a Python object
503+
content_as_list = ast.literal_eval(message["content"])
504+
if isinstance(content_as_list, list):
505+
# If the content is a list, process each nested message
506+
for nested_message in content_as_list:
507+
conversation_parts.append(
508+
self.format_message(nested_message)
509+
)
510+
else:
511+
# If the content is not a list, append it directly
512+
conversation_parts.append(self.format_message(message))
513+
except (ValueError, SyntaxError):
514+
# If evaluation fails or content is not a list/dict string, append the message directly
515+
conversation_parts.append(self.format_message(message))
516+
else:
517+
# For all other messages, use the existing formatting logic
518+
conversation_parts.append(self.format_message(message))
549519

550520
return llama_message + "".join(conversation_parts)
521+
522+
def format_message(self, message: dict) -> str:
523+
"""Format a single message for the conversation."""
524+
if "tool_calls" in message:
525+
for tool_call in message["tool_calls"]:
526+
function_name = tool_call["function"]["name"]
527+
arguments = tool_call["function"]["arguments"]
528+
return f"""
529+
<|start_header_id|>assistant<|end_header_id|>
530+
<function={function_name}>{arguments}</function>
531+
<|eom_id|>
532+
"""
533+
elif "tool_call_id" in message:
534+
tool_response = message["content"]
535+
return f"""
536+
<|start_header_id|>ipython<|end_header_id|>
537+
{tool_response}
538+
<|eot_id|>
539+
"""
540+
elif "function_call" in message:
541+
function_name = message["function_call"]["name"]
542+
arguments = message["function_call"]["arguments"]
543+
return f"""
544+
<|start_header_id|>assistant<|end_header_id|>
545+
<function={function_name}>{arguments}</function>
546+
<|eom_id|>
547+
"""
548+
elif (
549+
message["role"] in ["assistant", "user"] and message["content"] is not None
550+
):
551+
return f"""
552+
<|start_header_id|>{message['role']}<|end_header_id|>
553+
{message['content']}
554+
<|eot_id|>
555+
"""
556+
elif message["role"] == "function":
557+
function_response = message["content"]
558+
return f"""
559+
<|start_header_id|>ipython<|end_header_id|>
560+
{function_response}
561+
<|eot_id|>
562+
"""
563+
return ""

llmstudio/engine/providers/ollama.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ async def parse_response(
6464
if "error" in chunk:
6565
raise HTTPException(status_code=500, detail=chunk["error"])
6666
if chunk.get("done"):
67-
print("done")
6867
yield ChatCompletionChunk(
6968
id=str(uuid.uuid4()),
7069
choices=[

llmstudio/engine/providers/provider.py

Lines changed: 16 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -268,28 +268,6 @@ def join_chunks(self, chunks, request):
268268
):
269269
function_call_arguments += chunk.get("arguments")
270270

271-
chunk = ChatCompletion(
272-
id=chunks[-1].get("id"),
273-
created=chunks[-1].get("created"),
274-
model=chunks[-1].get("model"),
275-
object="chat.completion",
276-
choices=[
277-
Choice(
278-
finish_reason="function_call",
279-
index=0,
280-
logprobs=None,
281-
message=ChatCompletionMessage(
282-
content=None,
283-
role="assistant",
284-
tool_calls=None,
285-
function_call=FunctionCall(
286-
arguments=function_call_arguments,
287-
name=function_call_name,
288-
),
289-
),
290-
)
291-
],
292-
)
293271
return (
294272
ChatCompletion(
295273
id=chunks[-1].get("id"),
@@ -332,26 +310,6 @@ def join_chunks(self, chunks, request):
332310
)
333311
)
334312

335-
chunk = ChatCompletion(
336-
id=chunks[-1].get("id"),
337-
created=chunks[-1].get("created"),
338-
model=chunks[-1].get("model"),
339-
object="chat.completion",
340-
choices=[
341-
Choice(
342-
finish_reason="stop",
343-
index=0,
344-
logprobs=None,
345-
message=ChatCompletionMessage(
346-
content=stop_content,
347-
role="assistant",
348-
function_call=None,
349-
tool_calls=None,
350-
),
351-
)
352-
],
353-
)
354-
355313
return (
356314
ChatCompletion(
357315
id=chunks[-1].get("id"),
@@ -428,13 +386,22 @@ def input_to_string(self, input):
428386
if isinstance(input, str):
429387
return input
430388
else:
431-
return "".join(
432-
[
433-
message.get("content", "")
434-
for message in input
435-
if message.get("content") is not None
436-
]
437-
)
389+
result = []
390+
for message in input:
391+
if message.get("content") is not None:
392+
if isinstance(message["content"], str):
393+
result.append(message["content"])
394+
elif (
395+
isinstance(message["content"], list)
396+
and message.get("role") == "user"
397+
):
398+
for item in message["content"]:
399+
if item.get("type") == "text":
400+
result.append(item.get("text", ""))
401+
elif item.get("type") == "image_url":
402+
url = item.get("image_url", {}).get("url", "")
403+
result.append(url)
404+
return "".join(result)
438405

439406
def output_to_string(self, output):
440407
if output.choices[0].finish_reason == "stop":

llmstudio/server.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import threading
2+
from threading import Event
23

34
import requests
45

@@ -29,8 +30,10 @@ def is_server_running(host, port, path="/health"):
2930

3031
def start_server_component(host, port, run_func, server_name):
3132
if not is_server_running(host, port):
32-
thread = threading.Thread(target=run_func, daemon=True)
33+
started_event = Event()
34+
thread = threading.Thread(target=run_func, daemon=True, args=(started_event,))
3335
thread.start()
36+
started_event.wait() # wait for startup, this assumes the event is set somewhere
3437
return thread
3538
else:
3639
print(f"{server_name} server already running on {host}:{port}")
@@ -53,7 +56,6 @@ def setup_servers(engine, tracking, ui):
5356
TRACKING_HOST, TRACKING_PORT, run_tracking_app, "Tracking"
5457
)
5558

56-
ui_thread = None
5759
if ui:
5860
ui_thread = start_server_component(UI_HOST, UI_PORT, run_ui_app, "UI")
5961

0 commit comments

Comments
 (0)