|
| 1 | +"""Tag-based tool search strategy implementation. |
| 2 | +
|
| 3 | +This module provides a search strategy that ranks tools based on tag matches |
| 4 | +and description keyword matches. It implements a weighted scoring system where |
| 5 | +explicit tag matches receive higher scores than description word matches. |
| 6 | +""" |
| 7 | + |
1 | 8 | from utcp.client.tool_search_strategy import ToolSearchStrategy |
2 | 9 | from typing import List, Dict, Tuple |
3 | 10 | from utcp.shared.tool import Tool |
|
6 | 13 | import asyncio |
7 | 14 |
|
8 | 15 | class TagSearchStrategy(ToolSearchStrategy): |
| 16 | + """Tag-based search strategy for UTCP tools. |
| 17 | +
|
| 18 | + Implements a weighted scoring algorithm that matches search queries against |
| 19 | + tool tags and descriptions. Explicit tag matches receive full weight while |
| 20 | + description word matches receive reduced weight. |
| 21 | +
|
| 22 | + Scoring Algorithm: |
| 23 | + - Exact tag matches: Weight 1.0 |
| 24 | + - Tag word matches: Weight equal to description_weight |
| 25 | + - Description word matches: Weight equal to description_weight |
| 26 | + - Only considers description words longer than 2 characters |
| 27 | +
|
| 28 | + Examples: |
| 29 | + >>> strategy = TagSearchStrategy(repository, description_weight=0.3) |
| 30 | + >>> tools = await strategy.search_tools("weather api", limit=5) |
| 31 | + >>> # Returns tools with "weather" or "api" tags/descriptions |
| 32 | +
|
| 33 | + Attributes: |
| 34 | + tool_repository: Repository to search for tools. |
| 35 | + description_weight: Weight multiplier for description matches (0.0-1.0). |
| 36 | + """ |
9 | 37 |
|
10 | 38 | def __init__(self, tool_repository: ToolRepository, description_weight: float = 0.3): |
| 39 | + """Initialize the tag search strategy. |
| 40 | +
|
| 41 | + Args: |
| 42 | + tool_repository: Repository containing tools to search. |
| 43 | + description_weight: Weight for description word matches relative to |
| 44 | + tag matches. Should be between 0.0 and 1.0, where 1.0 gives |
| 45 | + equal weight to tags and descriptions. |
| 46 | +
|
| 47 | + Raises: |
| 48 | + ValueError: If description_weight is not between 0.0 and 1.0. |
| 49 | + """ |
| 50 | + if not 0.0 <= description_weight <= 1.0: |
| 51 | + raise ValueError("description_weight must be between 0.0 and 1.0") |
| 52 | + |
11 | 53 | self.tool_repository = tool_repository |
12 | 54 | # Weight for description words vs explicit tags (explicit tags have weight of 1.0) |
13 | 55 | self.description_weight = description_weight |
14 | 56 |
|
15 | 57 | async def search_tools(self, query: str, limit: int = 10) -> List[Tool]: |
16 | | - """ |
17 | | - Return tools ordered by tag occurrences in the query. |
18 | | - |
19 | | - Uses both explicit tags and words from tool descriptions (with less weight). |
20 | | - |
| 58 | + """Search tools using tag and description matching. |
| 59 | +
|
| 60 | + Implements a weighted scoring system that ranks tools based on how well |
| 61 | + their tags and descriptions match the search query. Normalizes the query |
| 62 | + and uses word-based matching with configurable weights. |
| 63 | +
|
| 64 | + Scoring Details: |
| 65 | + - Exact tag matches in query: +1.0 points |
| 66 | + - Individual tag words matching query words: +description_weight points |
| 67 | + - Description words matching query words: +description_weight points |
| 68 | + - Only description words > 2 characters are considered |
| 69 | +
|
21 | 70 | Args: |
22 | | - query: The search query string |
23 | | - limit: Maximum number of tools to return |
24 | | - |
| 71 | + query: Search query string. Case-insensitive, word-based matching. |
| 72 | + limit: Maximum number of tools to return. Must be >= 0. |
| 73 | +
|
25 | 74 | Returns: |
26 | | - List of tools ordered by relevance to the query |
| 75 | + List of Tool objects ranked by relevance score (highest first). |
| 76 | + Empty list if no tools match or repository is empty. |
| 77 | +
|
| 78 | + Raises: |
| 79 | + ValueError: If limit is negative. |
27 | 80 | """ |
| 81 | + if limit < 0: |
| 82 | + raise ValueError("limit must be non-negative") |
28 | 83 | # Normalize query to lowercase and split into words |
29 | 84 | query_lower = query.lower() |
30 | 85 | # Extract words from the query, filtering out non-word characters |
|
0 commit comments