-
Notifications
You must be signed in to change notification settings - Fork 651
Open
Description
The core Outlines API exposes a unified entry point:
model.generate(...)Its first argument is expected to support str, list, or Chat.
For list input and Chat input, all backends behave consistently.
However, for str input, different backends show inconsistent behavior, especially regarding whether the input is treated as a chat prompt (i.e., wrapped with a user role or chat template) or as a raw text completion prompt.
Below is a classification of the current behaviors.
Model Group 1 — accepts str, list, and Chat; all apply a chat template
These backends always wrap str and list as chat-style messages, same as Chat.
anthropic
`outlines/models/anthropic.py`
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> dict:
return {
"messages": [self._create_message("user", model_input)]
}
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> dict:
return {
"messages": [
self._create_message("user", model_input)
]
}
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
"""Generate the `messages` argument to pass to the client when the user
passes a Chat instance.
"""
return {
"messages": [
self._create_message(message["role"], message["content"])
for message in model_input.messages
]
}gemini
`outlines/models/gemini.py`
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> dict:
return {"contents": [self._create_text_part(model_input)]}
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> dict:
return {
"contents": [
self._create_message("user", model_input)
]
}
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> dict:
"""Generate the `contents` argument to pass to the client when the user
passes a Chat instance.
"""
return {
"contents": [
self._create_message(message["role"], message["content"])
for message in model_input.messages
]
}mistral
`outlines/models/mistral.py`
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
"""Format a string input into a list of messages.
Parameters
----------
model_input : str
The input string prompt.
Returns
-------
list
A list of Mistral message objects.
"""
from mistralai import UserMessage
return [UserMessage(content=model_input)]
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
"""Format a list input into a list of messages.
Parameters
----------
model_input : list
The input list, containing a string prompt and optionally Image
objects (vision models only).
Returns
-------
list
A list of Mistral message objects.
"""
from mistralai import UserMessage
return [UserMessage(content=self._create_message_content(model_input))]
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
"""Format a Chat input into a list of messages.
Parameters
----------
model_input : Chat
The Chat object containing a list of message dictionaries.
Returns
-------
list
A list of Mistral message objects.
"""
from mistralai import UserMessage, AssistantMessage, SystemMessage
messages = []
for message in model_input.messages:
role = message["role"]
content = message["content"]
if role == "user":
messages.append(
UserMessage(content=self._create_message_content(content))
)
elif role == "assistant":
messages.append(AssistantMessage(content=content))
elif role == "system":
messages.append(SystemMessage(content=content))
else:
raise ValueError(f"Unsupported role: {role}")
return messagesollama
`outlines/models/ollama.py`
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
"""Generate the value of the `messages` argument to pass to the
client when the user only passes a prompt.
"""
return [
self._create_message("user", model_input)
]
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
"""Generate the value of the `messages` argument to pass to the
client when the user passes a prompt and images.
"""
return [
self._create_message("user", model_input)
]
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
"""Generate the value of the `messages` argument to pass to the
client when the user passes a Chat instance.
"""
return [
self._create_message(message["role"], message["content"])
for message in model_input.messagesopenai
`outlines/models/openai.py`
@format_input.register(str)
def format_str_model_input(self, model_input: str) -> list:
"""Generate the value of the `messages` argument to pass to the
client when the user only passes a prompt.
"""
return [
self._create_message("user", model_input)
]
@format_input.register(list)
def format_list_model_input(self, model_input: list) -> list:
"""Generate the value of the `messages` argument to pass to the
client when the user passes a prompt and images.
"""
return [
self._create_message("user", model_input)
]
@format_input.register(Chat)
def format_chat_model_input(self, model_input: Chat) -> list:
"""Generate the value of the `messages` argument to pass to the
client when the user passes a Chat instance.
"""
return [
self._create_message(message["role"], message["content"])
for message in model_input.messages
]sglang
`outlines/models/sglang.py`
def format_input(self, model_input: Union[Chat, list, str]) -> list:
"""Generate the value of the messages argument to pass to the client.
We rely on the OpenAITypeAdapter to format the input as the sglang
server expects input in the same format as OpenAI.
Parameters
----------
model_input
The input passed by the user.
Returns
-------
list
The formatted input to be passed to the client.
"""
return OpenAITypeAdapter().format_input(model_input)vllm
`outlines/models/vllm.py`
def format_input(self, model_input: Union[Chat, str, list]) -> list:
"""Generate the value of the messages argument to pass to the client.
We rely on the OpenAITypeAdapter to format the input as the vLLM server
expects input in the same format as OpenAI.
Parameters
----------
model_input
The input passed by the user.
Returns
-------
list
The formatted input to be passed to the model.
"""
return OpenAITypeAdapter().format_input(model_input)Model Group 2 — accepts str and Chat; but str means completion mode, Chat means chat mode
These models do not auto-wrap str into chat messages.
Instead:
str→ passed as raw text prompt (completion API)Chat→ converted into chat messages (chat API)
llamacpp
`outlines/models/llamacpp.py`
@singledispatchmethod
def format_input(self, model_input):
"""Generate the prompt argument to pass to the model.
Parameters
----------
model_input
The input provided by the user.
Returns
-------
str
The formatted input to be passed to the model.
"""
raise NotImplementedError(
f"The input type {type(model_input)} is not available with "
"LlamaCpp. The only available types are `str` and `Chat`."
)
@format_input.register(str)
def format_str_input(self, model_input: str) -> str:
return model_input
@format_input.register(Chat)
def format_chat_input(self, model_input: Chat) -> list:
if not all(
isinstance(message["content"], str)
for message in model_input.messages
):
raise ValueError(
"LlamaCpp does not support multi-modal messages."
+ "The content of each message must be a string."
)
return [
{
"role": message["role"],
"content": message["content"],
}
for message in model_input.messages
] def generate(
self,
model_input: Union[Chat, str],
output_type: Optional[OutlinesLogitsProcessor] = None,
**inference_kwargs: Any,
) -> str:
"""Generate text using `llama-cpp-python`.
Parameters
----------
model_input
The prompt based on which the model will generate a response.
output_type
The logits processor the model will use to constrain the format of
the generated text.
**inference_kwargs
Additional keyword arguments to pass to the `Llama.__call__`
method of the `llama-cpp-python` library.
Returns
-------
str
The text generated by the model.
"""
prompt = self.type_adapter.format_input(model_input)
if isinstance(prompt, str):
completion = self.model(
prompt,
logits_processor=self.type_adapter.format_output_type(output_type),
**inference_kwargs,
)
result = completion["choices"][0]["text"]
elif isinstance(prompt, list): # pragma: no cover
completion = self.model.create_chat_completion(
prompt,
logits_processor=self.type_adapter.format_output_type(output_type),
**inference_kwargs,
)
result = completion["choices"][0]["message"]["content"]
self.model.reset()
return resultmlxlm
`outlines/models/mlxlm.py`
@format_input.register(str)
def format_str_input(self, model_input: str):
return model_input
@format_input.register(Chat)
def format_chat_input(self, model_input: Chat) -> str:
if not all(
isinstance(message["content"], str)
for message in model_input.messages
):
raise ValueError(
"mlx-lm does not support multi-modal messages."
+ "The content of each message must be a string."
)
return self.tokenizer.apply_chat_template(
model_input.messages,
tokenize=False,
add_generation_prompt=True,
)transformers
`outlines/models/transformers.py`
@singledispatchmethod
def format_input(self, model_input):
"""Generate the prompt argument to pass to the model.
Parameters
----------
model_input
The input passed by the user.
Returns
-------
str
The formatted input to be passed to the model.
"""
raise TypeError(
f"The input type {type(model_input)} is not available."
"The only available types are `str` and `Chat`."
)
@format_input.register(str)
def format_str_input(self, model_input: str) -> str:
return model_input
@format_input.register(Chat)
def format_chat_input(self, model_input: Chat) -> str:
return self.tokenizer.apply_chat_template(
model_input.messages,
tokenize=False,
add_generation_prompt=True,
)vllm_offline
`outlines/models/vllm_offline.py`
def generate(
self,
model_input: Chat | str,
output_type: Optional[Any] = None,
**inference_kwargs: Any,
) -> Union[str, List[str]]:
"""Generate text using vLLM offline.
Parameters
----------
prompt
The prompt based on which the model will generate a response.
output_type
The logits processor the model will use to constrain the format of
the generated text.
inference_kwargs
Additional keyword arguments to pass to the `generate` method
in the `vllm.LLM` model.
Returns
-------
Union[str, List[str]]
The text generated by the model.
"""
sampling_params = self._build_generation_args(
inference_kwargs,
output_type,
)
if isinstance(model_input, Chat):
results = self.model.chat(
messages=self.type_adapter.format_input(model_input),
sampling_params=sampling_params,
**inference_kwargs,
)
else:
results = self.model.generate(
prompts=self.type_adapter.format_input(model_input),
sampling_params=sampling_params,
**inference_kwargs,
)
results = [completion.text for completion in results[0].outputs]
if len(results) == 1:
return results[0]
else:
return resultsModel Group 3 — only accepts str; Unclear whether any chat template is applied
dottxt
`outlines/models/dottxt.py`
def format_input(self, model_input: str) -> str:
"""Format the prompt to pass to the client.
Parameters
----------
model_input
The input provided by the user.
Returns
-------
str
The input to pass to the client.
"""
if isinstance(model_input, str):
return model_input
raise TypeError(
f"The input type {model_input} is not available with Dottxt. "
"The only available type is `str`."
)tgi
`outlines/models/tgi.py`
@singledispatchmethod
def format_input(self, model_input):
"""Generate the prompt argument to pass to the client.
Argument
--------
model_input
The input passed by the user.
Returns
-------
str
The formatted input to be passed to the model.
"""
raise NotImplementedError(
f"The input type {input} is not available with TGI. "
+ "The only available type is `str`."
)Summary
Backends currently fall into three distinct behaviors for model.generate():
- Group 1 —
stris always treated as a chat message - Group 2 —
stris treated as raw completion, whileChatuses chat mode - Group 3 — only
stris supported at all, but unclear whether any chat template is applied
I think we may need a unifying policy (e.g., always treat str as chat?).
Metadata
Metadata
Assignees
Labels
No labels