-
Notifications
You must be signed in to change notification settings - Fork 839
Tool reduction #6781
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Tool reduction #6781
Changes from 4 commits
ace3d1d
c7608c1
a238349
06cedcc
56f4bdd
b2583ec
491ff72
d05b989
b3f7f85
62d1ae9
1b4661d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Collections.Generic; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
|
||
namespace Microsoft.Extensions.AI; | ||
|
||
/// <summary> | ||
/// Represents a strategy capable of selecting a reduced set of tools for a chat request. | ||
/// </summary> | ||
/// <remarks> | ||
/// A tool reduction strategy is invoked prior to sending a request to an underlying <see cref="IChatClient"/>, | ||
/// enabling scenarios where a large tool catalog must be trimmed to fit provider limits or to improve model | ||
/// tool selection quality. | ||
/// <para> | ||
/// The implementation should return a non-<see langword="null"/> enumerable. Returning the original | ||
/// <see cref="ChatOptions.Tools"/> instance indicates no change. Returning a different enumerable indicates | ||
/// the caller may replace the existing tool list. | ||
/// </para> | ||
/// </remarks> | ||
[Experimental("MEAI001")] | ||
public interface IToolReductionStrategy | ||
{ | ||
/// <summary> | ||
/// Selects the tools that should be included for a specific request. | ||
/// </summary> | ||
/// <param name="messages">The chat messages for the request. This is an <see cref="IEnumerable{T}"/> to avoid premature materialization.</param> | ||
/// <param name="options">The chat options for the request (may be <see langword="null"/>).</param> | ||
/// <param name="cancellationToken">A token to observe cancellation.</param> | ||
/// <returns> | ||
/// A (possibly reduced) enumerable of <see cref="AITool"/> instances. Must never be <see langword="null"/>. | ||
/// Returning the same instance referenced by <paramref name="options"/>.<see cref="ChatOptions.Tools"/> signals no change. | ||
/// </returns> | ||
Task<IEnumerable<AITool>> SelectToolsForRequestAsync( | ||
IEnumerable<ChatMessage> messages, | ||
ChatOptions? options, | ||
CancellationToken cancellationToken = default); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System; | ||
using System.Diagnostics.CodeAnalysis; | ||
using Microsoft.Shared.Diagnostics; | ||
|
||
namespace Microsoft.Extensions.AI; | ||
|
||
/// <summary>Extension methods for adding tool reduction middleware to a chat client pipeline.</summary> | ||
[Experimental("MEAI001")] | ||
public static class ChatClientBuilderToolReductionExtensions | ||
{ | ||
/// <summary> | ||
/// Adds tool reduction to the chat client pipeline using the specified <paramref name="strategy"/>. | ||
/// </summary> | ||
/// <param name="builder">The chat client builder.</param> | ||
/// <param name="strategy">The reduction strategy.</param> | ||
/// <returns>The original builder for chaining.</returns> | ||
/// <exception cref="ArgumentNullException">If <paramref name="builder"/> or <paramref name="strategy"/> is <see langword="null"/>.</exception> | ||
/// <remarks> | ||
/// This should typically appear in the pipeline before function invocation middleware so that only the reduced tools | ||
/// are exposed to the underlying provider. | ||
/// </remarks> | ||
public static ChatClientBuilder UseToolReduction(this ChatClientBuilder builder, IToolReductionStrategy strategy) | ||
{ | ||
_ = Throw.IfNull(builder); | ||
_ = Throw.IfNull(strategy); | ||
|
||
return builder.Use(inner => new ToolReducingChatClient(inner, strategy)); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.Linq; | ||
using System.Numerics.Tensors; | ||
using System.Runtime.CompilerServices; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Microsoft.Shared.Diagnostics; | ||
|
||
namespace Microsoft.Extensions.AI; | ||
|
||
#pragma warning disable IDE0032 // Use auto property, suppressed until repo updates to C# 14 | ||
|
||
/// <summary> | ||
/// A tool reduction strategy that ranks tools by embedding similarity to the current conversation context. | ||
/// </summary> | ||
/// <remarks> | ||
/// The strategy embeds each tool (name + description by default) once (cached) and embeds the current | ||
/// conversation content each request. It then selects the top <c>toolLimit</c> tools by similarity. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if there is a hybrid that can reduce tools, but then also expose a "get more tools" function where the model could describe the sort of tool it wanted and we could use that to give it back more tools that have embedding similarity to its request? Don't want to feature creep this, just trying to think of alternatives. I like that this one is hands free but but I can imagine that it can hit situations where it never gives the model the chance to call what might be the best tool. I like that the grouping ensures that all tools are made available, but it requires explicit grouping by the caller. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe what we could do is change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just implemented the above ^ |
||
/// </remarks> | ||
[Experimental("MEAI001")] | ||
public sealed class EmbeddingToolReductionStrategy : IToolReductionStrategy | ||
{ | ||
private readonly ConditionalWeakTable<AITool, Embedding<float>> _toolEmbeddingsCache = new(); | ||
private readonly IEmbeddingGenerator<string, Embedding<float>> _embeddingGenerator; | ||
private readonly int _toolLimit; | ||
|
||
private Func<AITool, string> _toolEmbeddingTextFactory = static t => | ||
{ | ||
if (string.IsNullOrWhiteSpace(t.Name)) | ||
{ | ||
return t.Description; | ||
} | ||
|
||
if (string.IsNullOrWhiteSpace(t.Description)) | ||
{ | ||
return t.Name; | ||
} | ||
|
||
return t.Name + "\n" + t.Description; | ||
}; | ||
|
||
private Func<IEnumerable<ChatMessage>, string> _messagesEmbeddingTextFactory = static messages => | ||
{ | ||
var messageTexts = messages.Select(m => m.Text).Where(s => !string.IsNullOrEmpty(s)); | ||
return string.Join("\n", messageTexts); | ||
|
||
}; | ||
|
||
private Func<ReadOnlyMemory<float>, ReadOnlyMemory<float>, float> _similarity = static (a, b) => TensorPrimitives.CosineSimilarity(a.Span, b.Span); | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="EmbeddingToolReductionStrategy"/> class. | ||
/// </summary> | ||
/// <param name="embeddingGenerator">Embedding generator used to produce embeddings.</param> | ||
/// <param name="toolLimit">Maximum number of tools to return. Must be greater than zero.</param> | ||
public EmbeddingToolReductionStrategy( | ||
IEmbeddingGenerator<string, Embedding<float>> embeddingGenerator, | ||
int toolLimit) | ||
{ | ||
_embeddingGenerator = Throw.IfNull(embeddingGenerator); | ||
_toolLimit = Throw.IfLessThanOrEqual(toolLimit, min: 0); | ||
} | ||
|
||
/// <summary> | ||
/// Gets or sets a delegate used to produce the text to embed for a tool. | ||
/// Defaults to: <c>Name + "\n" + Description</c> (omitting empty parts). | ||
MackinnonBuck marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
/// </summary> | ||
public Func<AITool, string> ToolEmbeddingTextFactory | ||
MackinnonBuck marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
get => _toolEmbeddingTextFactory; | ||
set => _toolEmbeddingTextFactory = Throw.IfNull(value); | ||
} | ||
|
||
/// <summary> | ||
/// Gets or sets the factory function used to generate a single text string from a collection of chat messages for | ||
/// embedding purposes. | ||
/// </summary> | ||
public Func<IEnumerable<ChatMessage>, string> MessagesEmbeddingTextFactory | ||
MackinnonBuck marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
get => _messagesEmbeddingTextFactory; | ||
set => _messagesEmbeddingTextFactory = Throw.IfNull(value); | ||
} | ||
|
||
/// <summary> | ||
/// Gets or sets a similarity function applied to (query, tool) embedding vectors. Defaults to cosine similarity. | ||
/// </summary> | ||
public Func<ReadOnlyMemory<float>, ReadOnlyMemory<float>, float> Similarity | ||
{ | ||
get => _similarity; | ||
set => _similarity = Throw.IfNull(value); | ||
} | ||
|
||
/// <summary> | ||
/// Gets or sets a value indicating whether tool embeddings are cached. Defaults to <see langword="true"/>. | ||
/// </summary> | ||
public bool EnableEmbeddingCaching { get; set; } = true; | ||
|
||
/// <summary> | ||
/// Gets or sets a value indicating whether to preserve original ordering of selected tools. | ||
/// If <see langword="false"/> (default), tools are ordered by descending similarity. | ||
/// If <see langword="true"/>, the top-N tools by similarity are re-emitted in their original order. | ||
MackinnonBuck marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// </summary> | ||
public bool PreserveOriginalOrdering { get; set; } | ||
|
||
/// <inheritdoc /> | ||
public async Task<IEnumerable<AITool>> SelectToolsForRequestAsync( | ||
IEnumerable<ChatMessage> messages, | ||
ChatOptions? options, | ||
CancellationToken cancellationToken = default) | ||
{ | ||
_ = Throw.IfNull(messages); | ||
|
||
if (options?.Tools is not { Count: > 0 } tools) | ||
{ | ||
return options?.Tools ?? []; | ||
MackinnonBuck marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
Debug.Assert(_toolLimit > 0, "Expected the tool count limit to be greater than zero."); | ||
|
||
if (tools.Count <= _toolLimit) | ||
{ | ||
// No reduction necessary. | ||
return tools; | ||
} | ||
|
||
// Build query text from recent messages. | ||
var queryText = MessagesEmbeddingTextFactory(messages); | ||
if (string.IsNullOrWhiteSpace(queryText)) | ||
{ | ||
// We couldn't build a meaningful query, likely because the message list was empty. | ||
// We'll just return a truncated list of tools. | ||
return tools.Take(_toolLimit); | ||
} | ||
|
||
// Ensure embeddings for any uncached tools are generated in a batch. | ||
var toolEmbeddings = await GetToolEmbeddingsAsync(tools, cancellationToken).ConfigureAwait(false); | ||
|
||
// Generate the query embedding. | ||
var queryEmbedding = await _embeddingGenerator.GenerateAsync(queryText, cancellationToken: cancellationToken).ConfigureAwait(false); | ||
var queryVector = queryEmbedding.Vector; | ||
|
||
// Compute rankings. | ||
var ranked = tools | ||
.Zip(toolEmbeddings, static (tool, embedding) => (Tool: tool, Embedding: embedding)) | ||
.Select((t, i) => (t.Tool, Index: i, Score: Similarity(queryVector, t.Embedding.Vector))) | ||
.OrderByDescending(t => t.Score) | ||
.Take(_toolLimit); | ||
|
||
if (PreserveOriginalOrdering) | ||
{ | ||
ranked = ranked.OrderBy(t => t.Index); | ||
} | ||
|
||
return ranked.Select(t => t.Tool); | ||
MackinnonBuck marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
|
||
private async Task<IReadOnlyList<Embedding<float>>> GetToolEmbeddingsAsync(IList<AITool> tools, CancellationToken cancellationToken) | ||
{ | ||
if (!EnableEmbeddingCaching) | ||
{ | ||
// Embed all tools in one batch; do not store in cache. | ||
return await ComputeEmbeddingsAsync(tools.Select(t => ToolEmbeddingTextFactory(t)), expectedCount: tools.Count); | ||
} | ||
|
||
var result = new Embedding<float>[tools.Count]; | ||
var cacheMisses = new List<(AITool Tool, int Index)>(tools.Count); | ||
|
||
for (var i = 0; i < tools.Count; i++) | ||
{ | ||
if (_toolEmbeddingsCache.TryGetValue(tools[i], out var embedding)) | ||
{ | ||
result[i] = embedding; | ||
} | ||
else | ||
{ | ||
cacheMisses.Add((tools[i], i)); | ||
} | ||
} | ||
|
||
if (cacheMisses.Count == 0) | ||
{ | ||
return result; | ||
} | ||
|
||
var uncachedEmbeddings = await ComputeEmbeddingsAsync(cacheMisses.Select(t => ToolEmbeddingTextFactory(t.Tool)), expectedCount: cacheMisses.Count); | ||
|
||
for (var i = 0; i < cacheMisses.Count; i++) | ||
{ | ||
var embedding = uncachedEmbeddings[i]; | ||
result[cacheMisses[i].Index] = embedding; | ||
_toolEmbeddingsCache.Add(cacheMisses[i].Tool, embedding); | ||
} | ||
|
||
return result; | ||
|
||
async ValueTask<GeneratedEmbeddings<Embedding<float>>> ComputeEmbeddingsAsync(IEnumerable<string> texts, int expectedCount) | ||
{ | ||
var embeddings = await _embeddingGenerator.GenerateAsync(texts, cancellationToken: cancellationToken).ConfigureAwait(false); | ||
MackinnonBuck marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
if (embeddings.Count != expectedCount) | ||
{ | ||
Throw.InvalidOperationException($"Expected {expectedCount} embeddings, got {embeddings.Count}."); | ||
} | ||
|
||
return embeddings; | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.