Skip to content
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ffe9a92
Prototype of using ImageGenerationTool
ericstj Aug 5, 2025
e5edc77
Handle DataContent returned from ImageGen
ericstj Aug 8, 2025
2d19cce
React to rename and improve metadata
ericstj Aug 9, 2025
5eef474
Handle image_generation tool content from streaming
ericstj Aug 20, 2025
ff80804
Add handling for combining updates with images
ericstj Aug 25, 2025
1725ce1
Add tests for new ChatResponseUpdateExtensions
ericstj Aug 26, 2025
c44f5fb
Merge branch 'main' of https://github.com/dotnet/extensions into Imag…
ericstj Sep 19, 2025
b4fe94b
Rename ImageGenerationTool to HostedImageGenerationTool
ericstj Sep 20, 2025
06bfa30
Remove ChatResponseUpdateCoalescingOptions
ericstj Sep 20, 2025
ca8b15d
Add ImageGeneratingChatClient
ericstj Sep 23, 2025
62e0ac5
Fix namespace of tool
ericstj Sep 26, 2025
81e6e5a
Replace traces of function calling
ericstj Sep 26, 2025
6559a66
More namepsace fix
ericstj Sep 26, 2025
398bbdb
Enable editing
ericstj Sep 30, 2025
ac2de35
Merge branch 'main' of https://github.com/dotnet/extensions into Imag…
ericstj Sep 30, 2025
1d96532
Update to preview OpenAI with image tool support
ericstj Oct 1, 2025
6a6ffa2
Temporary OpenAI feed
ericstj Oct 3, 2025
94ceab2
Fix tests
ericstj Oct 3, 2025
96e9747
Add integration tests for ImageGeneratingChatClient
ericstj Oct 3, 2025
9ddc91a
Remove ChatRole.Tool -> Assistant workaround
ericstj Oct 4, 2025
3b589ac
Remove use of private reflection for Image results
ericstj Oct 6, 2025
20919ab
Add ChatResponseUpdate.Clone
ericstj Oct 6, 2025
e5f68a6
Move all mutable state into RequestState object
ericstj Oct 7, 2025
9f9a430
Adjust prompt to improve integration test reliability
ericstj Oct 7, 2025
799a72e
Refactor tool initialization
ericstj Oct 7, 2025
6029b01
Add integration tests for streaming
ericstj Oct 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace Microsoft.Extensions.AI;
/// </para>
/// <para>
/// The relationship between <see cref="ChatResponse"/> and <see cref="ChatResponseUpdate"/> is
/// codified in the <see cref="ChatResponseExtensions.ToChatResponseAsync"/> and
/// codified in the <see cref="ChatResponseExtensions.ToChatResponseAsync(IAsyncEnumerable{ChatResponseUpdate}, System.Threading.CancellationToken)"/> and
/// <see cref="ChatResponse.ToChatResponseUpdates"/>, which enable bidirectional conversions
/// between the two. Note, however, that the provided conversions may be lossy, for example if multiple
/// updates all have different <see cref="RawRepresentation"/> objects whereas there's only one slot for
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;

namespace Microsoft.Extensions.AI;

/// <summary>
/// Provides options for configuring how <see cref="ChatResponseUpdate"/> instances are coalesced
/// when converting them to <see cref="ChatMessage"/> instances.
/// </summary>
[Experimental("EXTAI0001")]
public class ChatResponseUpdateCoalescingOptions
{
/// <summary>
/// Gets or sets a value indicating whether to replace existing <see cref="DataContent"/> items
/// when a new <see cref="DataContent"/> item with the same <see cref="DataContent.Name"/> is encountered.
/// </summary>
/// <value>
/// <see langword="true"/> to replace existing <see cref="DataContent"/> items with the same name;
/// <see langword="false"/> to keep all <see cref="DataContent"/> items. The default is <see langword="false"/>.
/// </value>
/// <remarks>
/// When this property is <see langword="true"/>, if a <see cref="DataContent"/> item is being added
/// and there's already a <see cref="DataContent"/> item in the content list with the same
/// <see cref="DataContent.Name"/>, the existing item will be replaced with the new one.
/// This is useful for scenarios where updated data should override previous data with the same identifier.
/// </remarks>
public bool ReplaceDataContentWithSameName { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;

namespace Microsoft.Extensions.AI;

/// <summary>Represents a hosted tool that can be specified to an AI service to enable it to perform image generation.</summary>
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's other hosted tools, like OpenAI's Code Interpreter. Other providers have something similar. Anthropic for example has Web Search, Fetch and Code Interpreter as "Server Tools". Maybe out of scope for this change, but a generalized abstraction for these would be great. AdditionalProperties seems to be common for all of them - including the Anthropic ones. So I think this would fit beautifully as a more general abstraction than ImageGenerationTool. ServerTool or HostedTool?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have HostedWebSearchTool, HostedCodeInterpreterTool, HostedFileSearchTool, and HostedMcpServerTool. Having a HostedImageGenerationTool makes sense to me. Different providers have different ways of exposing the same fundamental information, so being able to write e.g. HostedWebSearchTool, and have that map to the right thing for Gemini and Anthropic and OpenAI makes sense to me. AdditionalProperties can be used in each when there's some setting that's not exposed in a strongly-typed fashion on the tool type.

/// <remarks>
/// This tool does not itself implement image generation. It is a marker that can be used to inform a service
/// that the service is allowed to perform image generation if the service is capable of doing so.
/// </remarks>
[Experimental("MEAI001")]
public class ImageGenerationTool : AITool
{
/// <summary>
/// Initializes a new instance of the <see cref="ImageGenerationTool"/> class with the specified options.
/// </summary>
/// <param name="options">The options to configure the image generation request. If <paramref name="options"/> is <see langword="null"/>, default options will be used.</param>
public ImageGenerationTool(ImageGenerationOptions? options = null)
: base()
{
AdditionalProperties = new AdditionalPropertiesDictionary(new Dictionary<string, object?>
{
[nameof(ImageGenerationOptions)] = options
});
}

/// <inheritdoc />
public override IReadOnlyDictionary<string, object?> AdditionalProperties { get; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ namespace Microsoft.Extensions.AI;
WriteIndented = true)]
[JsonSerializable(typeof(OpenAIClientExtensions.ToolJson))]
[JsonSerializable(typeof(IDictionary<string, object?>))]
[JsonSerializable(typeof(IDictionary<string, string?>))]
[JsonSerializable(typeof(string[]))]
[JsonSerializable(typeof(IEnumerable<string>))]
[JsonSerializable(typeof(JsonElement))]
[JsonSerializable(typeof(int))]
internal sealed partial class OpenAIJsonContext : JsonSerializerContext;
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
using System;
using System.ClientModel.Primitives;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization.Metadata;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.Diagnostics;
using OpenAI.Images;
using OpenAI.Responses;

#pragma warning disable S907 // "goto" statement should not be used
Expand Down Expand Up @@ -163,7 +166,15 @@ internal static IEnumerable<ChatMessage> ToChatMessages(IEnumerable<ResponseItem
break;

default:
message.Contents.Add(new() { RawRepresentation = outputItem });
if (outputItem.GetType().Name == "InternalImageGenToolCallItemResource")
{
message.Contents.Add(GetContentFromImageGen(outputItem));
}
else
{
message.Contents.Add(new() { RawRepresentation = outputItem });
}

break;
}
}
Expand All @@ -174,6 +185,118 @@ internal static IEnumerable<ChatMessage> ToChatMessages(IEnumerable<ResponseItem
}
}

[DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(ResponseItem))]
private static DataContent GetContentFromImageGen(ResponseItem outputItem)
{
const BindingFlags InternalBindingFlags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
var imageGenResultType = Type.GetType("OpenAI.Responses.InternalImageGenToolCallItemResource, OpenAI");
if (imageGenResultType == null)
{
throw new InvalidOperationException("Unable to determine the type of the image generation result.");
}

var imageGenStatus = imageGenResultType.GetProperty("Status", InternalBindingFlags)?.GetValue(outputItem)?.ToString();
var imageGenResult = imageGenResultType.GetProperty("Result", InternalBindingFlags)?.GetValue(outputItem) as string;

IDictionary<string, BinaryData>? additionalRawData = imageGenResultType
.GetProperty("SerializedAdditionalRawData", InternalBindingFlags)
?.GetValue(outputItem) as IDictionary<string, BinaryData>;

// Properties
// background
// output_format
// quality
// revised_prompt
// size

string outputFormat = getStringProperty("output_format") ?? "png";

var resultBytes = Convert.FromBase64String(imageGenResult ?? string.Empty);

return new DataContent(resultBytes, $"image/{outputFormat}")
{
RawRepresentation = outputItem,
AdditionalProperties = new()
{
["background"] = getStringProperty("background"),
["output_format"] = outputFormat,
["quality"] = getStringProperty("quality"),
["revised_prompt"] = getStringProperty("revised_prompt"),
["size"] = getStringProperty("size"),
["status"] = imageGenStatus,
}
};

string? getStringProperty(string name)
{
if (additionalRawData?.TryGetValue(name, out var outputFormat) == true)
{
var stringJsonTypeInfo = (JsonTypeInfo<string>)AIJsonUtilities.DefaultOptions.GetTypeInfo(typeof(string));
return JsonSerializer.Deserialize(outputFormat, stringJsonTypeInfo);
}

return null;
}
}

[DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(ResponseItem))]
private static DataContent GetContentFromImageGenPartialImageEvent(StreamingResponseUpdate update)
{
const BindingFlags InternalBindingFlags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
var partialImageEventType = Type.GetType("OpenAI.Responses.InternalResponseImageGenCallPartialImageEvent, OpenAI");
if (partialImageEventType == null)
{
throw new InvalidOperationException("Unable to determine the type of the image generation result.");
}

var imageGenResult = partialImageEventType.GetProperty("PartialImageB64", InternalBindingFlags)?.GetValue(update) as string;
var imageGenItemId = partialImageEventType.GetProperty("ItemId", InternalBindingFlags)?.GetValue(update) as string;
var imageGenOutputIndex = partialImageEventType.GetProperty("OutputIndex", InternalBindingFlags)?.GetValue(update) as int?;
var imageGenPartialImageIndex = partialImageEventType.GetProperty("PartialImageIndex", InternalBindingFlags)?.GetValue(update) as int?;

IDictionary<string, BinaryData>? additionalRawData = partialImageEventType
.GetProperty("SerializedAdditionalRawData", InternalBindingFlags)
?.GetValue(update) as IDictionary<string, BinaryData>;

// Properties
// background
// output_format
// quality
// revised_prompt
// size

string outputFormat = getStringProperty("output_format") ?? "png";

var resultBytes = Convert.FromBase64String(imageGenResult ?? string.Empty);

return new DataContent(resultBytes, $"image/{outputFormat}")
{
RawRepresentation = update,
AdditionalProperties = new()
{
["ItemId"] = imageGenItemId,
["OutputIndex"] = imageGenOutputIndex,
["PartialImageIndex"] = imageGenPartialImageIndex,
["background"] = getStringProperty("background"),
["output_format"] = outputFormat,
["quality"] = getStringProperty("quality"),
["revised_prompt"] = getStringProperty("revised_prompt"),
["size"] = getStringProperty("size"),
}
};

string? getStringProperty(string name)
{
if (additionalRawData?.TryGetValue(name, out var outputFormat) == true)
{
var stringJsonTypeInfo = (JsonTypeInfo<string>)AIJsonUtilities.DefaultOptions.GetTypeInfo(typeof(string));
return JsonSerializer.Deserialize(outputFormat, stringJsonTypeInfo);
}

return null;
}
}

/// <inheritdoc />
public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
Expand Down Expand Up @@ -324,7 +447,16 @@ ChatResponseUpdate CreateUpdate(AIContent? content = null) =>
break;

default:
yield return CreateUpdate();

if (streamingUpdate.GetType().Name == "InternalResponseImageGenCallPartialImageEvent")
{
yield return CreateUpdate(GetContentFromImageGenPartialImageEvent(streamingUpdate));
}
else
{
yield return CreateUpdate();
}

break;
}
}
Expand All @@ -349,6 +481,59 @@ internal static ResponseTool ToResponseTool(AIFunction aiFunction, ChatOptions?
strict ?? false);
}

internal static ResponseTool ToImageResponseTool(ImageGenerationTool imageGenerationTool, ChatOptions? options = null)
{
ImageGenerationOptions? imageGenerationOptions = null;
if (imageGenerationTool.AdditionalProperties.TryGetValue(nameof(ImageGenerationOptions), out object? optionsObj))
{
imageGenerationOptions = optionsObj as ImageGenerationOptions;
}
else if (options?.AdditionalProperties?.TryGetValue(nameof(ImageGenerationOptions), out object? optionsObj2) ?? false)
{
imageGenerationOptions = optionsObj2 as ImageGenerationOptions;
}

var toolOptions = imageGenerationOptions?.RawRepresentationFactory?.Invoke(null!) as Dictionary<string, object> ?? new();
toolOptions["type"] = "image_generation";

// Size: Image dimensions (e.g., 1024x1024, 1024x1536)
if (imageGenerationOptions?.ImageSize is not null && !toolOptions.ContainsKey("size"))
{
// Use a custom type to ensure the size is formatted correctly.
// This is a workaround for OpenAI's specific size format requirements.
toolOptions["size"] = new GeneratedImageSize(
imageGenerationOptions.ImageSize.Value.Width,
imageGenerationOptions.ImageSize.Value.Height).ToString();
}

// Format: File output format
if (imageGenerationOptions?.MediaType is not null && !toolOptions.ContainsKey("format"))
{
toolOptions["output_format"] = imageGenerationOptions.MediaType switch
{
"image/png" => GeneratedImageFileFormat.Png.ToString(),
"image/jpeg" => GeneratedImageFileFormat.Jpeg.ToString(),
"image/webp" => GeneratedImageFileFormat.Webp.ToString(),
_ => string.Empty,
};
}

// unexposed properties, string unless noted
// background: transparent, opaque, auto
// input_fidelity: effort model exerts to match input (high, low)
// input_image_mask: optional image mask for inpainting. Object with property file_id string or image_url data string.
// model: Model ID to use for image generation
// moderation: Moderation level (auto, low)
// output_compression: (int) Compression level (0-100%) for JPEG and WebP formats
// partial_images: (int) Number of partial images to return (0-3)
// quality: Rendering quality (e.g. low, medium, high)

// Can't create the tool, but we can deserialize it from Json
BinaryData? toolOptionsData = BinaryData.FromBytes(
JsonSerializer.SerializeToUtf8Bytes(toolOptions, OpenAIJsonContext.Default.IDictionaryStringObject));
return ModelReaderWriter.Read<ResponseTool>(toolOptionsData, ModelReaderWriterOptions.Json)!;
}

/// <summary>Creates a <see cref="ChatRole"/> from a <see cref="MessageRole"/>.</summary>
private static ChatRole ToChatRole(MessageRole? role) =>
role switch
Expand Down Expand Up @@ -403,6 +588,10 @@ private ResponseCreationOptions ToOpenAIResponseCreationOptions(ChatOptions? opt
result.Tools.Add(ToResponseTool(aiFunction, options));
break;

case ImageGenerationTool imageGenerationTool:
result.Tools.Add(ToImageResponseTool(imageGenerationTool, options));
break;

case HostedWebSearchTool webSearchTool:
WebSearchUserLocation? location = null;
if (webSearchTool.AdditionalProperties.TryGetValue(nameof(WebSearchUserLocation), out object? objLocation))
Expand Down
Loading
Loading