diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py index 85fe5ed6..170ea2c3 100644 --- a/crewai_tools/__init__.py +++ b/crewai_tools/__init__.py @@ -96,3 +96,11 @@ ZapierActionTools, ParallelSearchTool, ) + +from .tools.llmlayer_tools import ( + LLMLayerSearchTool, + LLMLayerWebSearchTool, + LLMLayerScraperTool, + LLMLayerPDFTool, + LLMLayerYouTubeTool, +) \ No newline at end of file diff --git a/crewai_tools/tools/llmlayer_tools/README.md b/crewai_tools/tools/llmlayer_tools/README.md new file mode 100644 index 00000000..efe2fcef --- /dev/null +++ b/crewai_tools/tools/llmlayer_tools/README.md @@ -0,0 +1,648 @@ +# LLMLayer Tools + +Five production-ready tools that give CrewAI agents real-time web search, content extraction, and AI-powered answers using 20+ language models. + +## Features + +- **AI-Powered Search** - Combine web search with LLM responses (citations, sources, images) +- **Multi-Type Search** - General, news, shopping, videos, images, scholarly content +- **Content Extraction** - Scrape any webpage as markdown, HTML, PDF, or screenshot +- **PDF Processing** - Extract text from PDF documents +- **YouTube Transcripts** - Get video transcripts in multiple languages +- **Zero New Dependencies** - Uses standard `requests` library + +## Installation + +```bash +pip install 'crewai[tools]' +``` + +Get your free API key at [llmlayer.dev](https://llmlayer.ai) + +## Quick Start + +```python +import os +from crewai import Agent, Task, Crew +from crewai_tools import LLMLayerSearchTool + +os.environ["LLMLAYER_API_KEY"] = "your_api_key_here" + +# Create agent with search capability +researcher = Agent( + role='Research Analyst', + goal='Find and analyze current information', + tools=[LLMLayerSearchTool()], + verbose=True +) + +# Define task +task = Task( + description='What are the latest developments in quantum computing? Use model: openai/gpt-4o-mini', + agent=researcher, + expected_output='Summary with sources' +) + +# Run +crew = Crew(agents=[researcher], tasks=[task]) +result = crew.kickoff() +print(result) +``` + +## Available Tools + +| Tool | Use Case | Key Parameters | +|------|----------|----------------| +| **LLMLayerSearchTool** | AI answers with web search | `query`, `model`, `citations`, `return_sources` | +| **LLMLayerWebSearchTool** | Raw search results | `query`, `search_type` (general/news/videos/images/scholar) | +| **LLMLayerScraperTool** | Extract webpage content | `url`, `format` (markdown/html/pdf/screenshot) | +| **LLMLayerPDFTool** | Extract text from PDFs | `url` | +| **LLMLayerYouTubeTool** | Get video transcripts | `url`, `language` | + +## Usage Examples + +### Research Agent with Citations + +```python +from crewai import Agent, Task, Crew +from crewai_tools import LLMLayerSearchTool + +researcher = Agent( + role='Research Analyst', + tools=[LLMLayerSearchTool()], + verbose=True +) + +task = Task( + description=''' + Research climate change policies from 2024-2025. + Model: anthropic/claude-sonnet-4 + Enable citations and sources. + Filter: date_filter=month + ''', + agent=researcher, + expected_output='Report with citations' +) + +crew = Crew(agents=[researcher], tasks=[task]) +result = crew.kickoff() +``` + +### Multi-Source Intelligence Agent + +```python +from crewai_tools import ( + LLMLayerSearchTool, + LLMLayerScraperTool, + LLMLayerPDFTool +) + +analyst = Agent( + role='Intelligence Analyst', + tools=[ + LLMLayerSearchTool(), + LLMLayerScraperTool(), + LLMLayerPDFTool() + ], + verbose=True +) + +task = Task( + description=''' + Analyze the company at https://example.com + 1. Search for recent news (model: openai/gpt-4o-mini) + 2. Scrape their website for key info + 3. Extract data from their whitepaper PDF + ''', + agent=analyst, + expected_output='Comprehensive analysis' +) +``` + +### Domain-Filtered Search + +```python +task = Task( + description=''' + Find Python best practices. + Model: openai/gpt-4o-mini + Only search: stackoverflow.com, python.org + Exclude: reddit.com + Use domain_filter: ["stackoverflow.com", "python.org", "-reddit.com"] + ''', + agent=researcher, + expected_output='Best practices summary' +) +``` + +### Structured JSON Output + +```python +task = Task( + description=''' + Summarize AI safety research. + Model: openai/gpt-4o-mini + Return as JSON with: summary, key_points (array), confidence (number) + Use answer_type: json + ''', + agent=researcher, + expected_output='JSON structured response' +) +``` + +### News Monitoring + +```python +from crewai_tools import LLMLayerWebSearchTool + +monitor = Agent( + role='News Monitor', + tools=[LLMLayerWebSearchTool()], + verbose=True +) + +task = Task( + description=''' + Find AI regulation news from the past week. + search_type: news + recency: week + ''', + agent=monitor, + expected_output='News summary' +) +``` + +### Web Scraping for Content Analysis + +```python +from crewai_tools import LLMLayerScraperTool + +content_analyst = Agent( + role='Content Analyst', + tools=[LLMLayerScraperTool()], + verbose=True +) + +task = Task( + description=''' + Scrape https://example.com/blog/latest-article + Extract as markdown format + Include all images and links + Analyze the key points + ''', + agent=content_analyst, + expected_output='Article analysis with key points' +) +``` + +### Scrape Website as Screenshot + +```python +task = Task( + description=''' + Capture screenshot of https://competitor-site.com + Format: screenshot + Analyze the UI/UX design elements + ''', + agent=content_analyst, + expected_output='Screenshot analysis' +) +``` + +### Extract and Analyze PDF Documents + +```python +from crewai_tools import LLMLayerPDFTool + +document_analyst = Agent( + role='Document Analyst', + tools=[LLMLayerPDFTool()], + verbose=True +) + +task = Task( + description=''' + Extract text from https://example.com/whitepaper.pdf + Summarize key findings + Identify main conclusions + ''', + agent=document_analyst, + expected_output='PDF summary with key findings' +) +``` + +### Analyze YouTube Videos + +```python +from crewai_tools import LLMLayerYouTubeTool + +video_analyst = Agent( + role='Video Content Analyst', + tools=[LLMLayerYouTubeTool()], + verbose=True +) + +task = Task( + description=''' + Get transcript from https://youtube.com/watch?v=VIDEO_ID + Language: en + Summarize main topics discussed + Extract key quotes + ''', + agent=video_analyst, + expected_output='Video content summary' +) +``` + +### Multi-Language YouTube Analysis + +```python +task = Task( + description=''' + Extract Spanish transcript from https://youtube.com/watch?v=VIDEO_ID + Language: es + Translate key points to English + ''', + agent=video_analyst, + expected_output='Translated summary' +) +``` + +## Configuration + +### Environment Variables + +| Variable | Description | Required | +|----------|-------------|----------| +| `LLMLAYER_API_KEY` | Your LLMLayer API key | Yes | + +### Supported Models + +**OpenAI:** `openai/gpt-4o-mini`, `openai/gpt-4o`, `openai/gpt-4.1` +**Anthropic:** `anthropic/claude-sonnet-4`, +**Groq:** `groq/llama-3.3-70b-versatile`, + +[See full model list →](https://docs.llmlayer.ai/answer) + +### Tool Parameters + +**LLMLayerSearchTool** +- **Required:** `query`, `model` +- **Optional:** `location` (country code), `citations` (bool), `return_sources` (bool), `return_images` (bool), `date_filter` (hour/day/week/month/year/anytime), `domain_filter` (list), `max_tokens` (int), `temperature` (float), `answer_type` (markdown/html/json), `json_schema` (str/dict) + +**LLMLayerWebSearchTool** +- **Required:** `query` +- **Optional:** `search_type` (general/news/shopping/videos/images/scholar), `location`, `recency` (hour/day/week/month/year), `domain_filter` (list) + +**LLMLayerScraperTool** +- **Required:** `url` +- **Optional:** `format` (markdown/html/screenshot/pdf), `include_images` (bool), `include_links` (bool) +- **Returns:** Extracted content in specified format. For screenshot/pdf, returns base64-encoded data with length indicator. + +**LLMLayerPDFTool** +- **Required:** `url` (direct PDF link) +- **Returns:** Full text content with page count and metadata + +**LLMLayerYouTubeTool** +- **Required:** `url` (YouTube video URL) +- **Optional:** `language` (e.g., 'en', 'es', 'fr') +- **Returns:** Full transcript with detected/specified language + +## How Agents Use Tools + +Agents automatically call tools based on task descriptions. You guide tool usage through natural language: + +```python +# Agent will automatically: +# 1. Parse the task description +# 2. Identify required tool parameters +# 3. Call the appropriate tool +# 4. Process the results + +task = Task( + description=''' + Search for "AI breakthroughs 2025" + Model: openai/gpt-4o-mini + Return sources and citations + Filter to last month only + ''', + agent=researcher +) +``` + +The agent extracts: +- `query`: "AI breakthroughs 2025" +- `model`: "openai/gpt-4o-mini" +- `return_sources`: True +- `citations`: True +- `date_filter`: "month" + +## Tool Details + +### LLMLayerSearchTool + +**Purpose:** Combines web search with LLM processing to generate AI-powered answers with optional citations and sources. + +**When to use:** +- Need AI-analyzed answers instead of raw search results +- Want citations and source attribution +- Require structured JSON responses +- Need domain-specific filtered results + +**Example outputs:** +- Markdown text with inline citations: `[1]` +- HTML formatted responses +- Structured JSON matching your schema +- Images from search results (when `return_images=True`) + +**Task description format:** +```python +description = ''' +Query: your search question +Model: model_name +Citations: true/false +Return sources: true/false +Date filter: hour/day/week/month/year +Domain filter: ["domain1.com", "-exclude.com"] +''' +``` + +### LLMLayerWebSearchTool + +**Purpose:** Raw web search across multiple content types without AI processing. + +**When to use:** +- Need unprocessed search results +- Want to search specific content types (news, videos, images, scholarly articles) +- Prefer raw data for your own analysis +- Need shopping or video results + +**Search types available:** +- `general` - Standard web search +- `news` - Recent news articles +- `shopping` - Product listings and prices +- `videos` - Video content +- `images` - Image search results +- `scholar` - Academic papers and citations + +**Example outputs:** +- List of results with titles, URLs, and snippets +- No AI interpretation, just raw search data + +**Task description format:** +```python +description = ''' +Query: search terms +Search type: general/news/shopping/videos/images/scholar +Recency: hour/day/week/month/year +Location: country_code +''' +``` + +### LLMLayerScraperTool + +**Purpose:** Extract content from any webpage in multiple formats. + +**When to use:** +- Need to extract article text or documentation +- Want to analyze website content +- Need screenshots for visual analysis +- Want to generate PDFs from webpages + +**Formats available:** +- `markdown` - Clean text with formatting (default) +- `html` - Full HTML source +- `screenshot` - Visual capture (returns base64-encoded image) +- `pdf` - Generated PDF (returns base64-encoded PDF) + +**Options:** +- `include_images`: Keep or remove images from markdown +- `include_links`: Keep or remove hyperlinks from markdown + +**Example outputs:** +- Markdown: Clean, readable text format +- HTML: Full webpage source code +- Screenshot: Base64-encoded PNG image data +- PDF: Base64-encoded PDF document + +**Task description format:** +```python +description = ''' +URL: webpage_url +Format: markdown/html/screenshot/pdf +Include images: true/false +Include links: true/false +''' +``` + +### LLMLayerPDFTool + +**Purpose:** Extract full text content from PDF documents via URL. + +**When to use:** +- Need to analyze PDF reports, papers, or documents +- Want to extract text from research papers +- Need to process whitepaper content +- Analyzing PDF-based documentation + +**Returns:** +- Full text content extracted from PDF +- Total page count +- Original and final URL (after redirects) +- Processing metadata + +**Task description format:** +```python +description = ''' +URL: direct_pdf_url +Extract and analyze the content +Summarize key findings +''' +``` + +**Note:** URL must be a direct link to a PDF file (ends in .pdf or serves PDF content-type). + +### LLMLayerYouTubeTool + +**Purpose:** Extract transcripts from YouTube videos in multiple languages. + +**When to use:** +- Need to analyze video content without watching +- Want to extract quotes from videos +- Need to translate or summarize video content +- Analyzing tutorial or lecture content + +**Returns:** +- Full video transcript +- Detected or specified language code +- Complete video URL + +**Supported languages:** 100+ languages including: +- `en` - English +- `es` - Spanish +- `fr` - French +- `de` - German +- `ja` - Japanese +- `zh` - Chinese +- And many more... + +**Task description format:** +```python +description = ''' +URL: youtube_video_url +Language: en/es/fr/de/etc (optional) +Analyze the content +Extract key points +''' +``` + +**Note:** Video must have captions/transcripts available. Auto-generated captions are supported. + +## Advanced Configuration + +### Using Your Own Model Provider Keys + +```python +task = Task( + description=''' + Research quantum computing. + Model: openai/gpt-4o-mini + Use provider_key: your_openai_api_key_here + ''', + agent=researcher +) +``` + +### Custom Timeouts and Metadata + +```python +from crewai_tools import LLMLayerSearchTool + +# Configure tool instance +search_tool = LLMLayerSearchTool( + timeout=120, # 2 minutes + include_metadata=True # Show response time and token usage +) + +agent = Agent( + role='Researcher', + tools=[search_tool], + verbose=True +) +``` + +## API Costs + +LLMLayer uses transparent, pay-per-use pricing: + +| Operation | Cost | +|-----------|---------------------| +| Answer API (Search + AI) | Model cost + $0.004 | +| Web Search | $0.001 | +| Web Scraper | $0.001 | +| PDF Extraction | $0.002 | +| YouTube Transcript | $0.002 | + +**Note:** Model costs vary by provider. Use `provider_key` to use your own model API keys (eliminates model cost). + +## Error Handling + +Tools return descriptive error messages instead of raising exceptions: + +```python +# If API key is missing or invalid: +"Error: LLMLAYER_API_KEY not set. Set it as environment variable or pass to tool initialization." + +# If API returns an error: +"Error: LLMLayer API error [authentication_error]: Invalid API key" + +# If request times out: +"Error: Request timed out after 90 seconds" +``` + +Common error codes: +- `authentication_error` - Invalid API key +- `validation_error` - Invalid parameters +- `rate_limit` - Too many requests +- `provider_error` - LLM provider issue + +## Best Practices + +### 1. Use Environment Variables for API Keys + +```python +# ✅ Good +import os +os.environ["LLMLAYER_API_KEY"] = "your_key" + +# ❌ Bad +tool = LLMLayerSearchTool(api_key="hardcoded_key") +``` + +### 2. Choose the Right Model + +```python +# For speed and cost +model = "openai/gpt-4o-mini" + +# For quality +model = "anthropic/claude-sonnet-4" + +# For long contexts +model = "google/gemini-1.5-pro" +``` + +### 3. Use Citations for Verifiable Information + +```python +description = ''' +Research climate policies. +Model: openai/gpt-4o-mini +Enable citations and return_sources +''' +``` + +### 4. Filter Domains for Quality + +```python +description = ''' +Search medical research. +Model: openai/gpt-4o-mini +Domain filter: ["nih.gov", "who.int", "nejm.org"] +''' +``` + +### 5. Apply Recency Filters for Current Events + +```python +description = ''' +Find tech news. +Model: openai/gpt-4o-mini +Date filter: day +''' +``` + +## Testing + +Run tests: + +```bash +uv run pytest tests/tools/llmlayer_tools_test.py -v +``` + +All tests passing: **29/29** ✓ + +## Support + +- **Documentation:** [docs.llmlayer.ai](https://docs.llmlayer.ai) +- **API Reference:** [docs.llmlayer.ai/api-reference](https://docs.llmlayer.ai/api-reference) +- **Get API Key:** [app.llmlayer.ai](https://app.llmlayer.ai) +- **Issues:** [github.com/crewAIInc/crewAI-tools/issues](https://github.com/crewAIInc/crewAI-tools/issues) + +## License + +MIT License - Part of the CrewAI Tools project + +--- + +**Built for [CrewAI](https://crewai.com) • Powered by [LLMLayer](https://llmlayer.ai)** \ No newline at end of file diff --git a/crewai_tools/tools/llmlayer_tools/__init__.py b/crewai_tools/tools/llmlayer_tools/__init__.py new file mode 100644 index 00000000..ee618f06 --- /dev/null +++ b/crewai_tools/tools/llmlayer_tools/__init__.py @@ -0,0 +1,15 @@ +from .llmlayer_tools import ( + LLMLayerPDFTool, + LLMLayerScraperTool, + LLMLayerSearchTool, + LLMLayerWebSearchTool, + LLMLayerYouTubeTool, +) + +__all__ = [ + "LLMLayerPDFTool", + "LLMLayerScraperTool", + "LLMLayerSearchTool", + "LLMLayerWebSearchTool", + "LLMLayerYouTubeTool", +] diff --git a/crewai_tools/tools/llmlayer_tools/llmlayer_tools.py b/crewai_tools/tools/llmlayer_tools/llmlayer_tools.py new file mode 100644 index 00000000..c79f440c --- /dev/null +++ b/crewai_tools/tools/llmlayer_tools/llmlayer_tools.py @@ -0,0 +1,656 @@ +import json +import os +from typing import Any, Literal + +import requests +from crewai.tools import BaseTool +from pydantic import BaseModel, Field + +# ================================ +# LLMLayer Search Tool +# ================================ + + +class LLMLayerSearchInput(BaseModel): + """Input schema for LLMLayer Answer API.""" + + query: str = Field(description="The search query or question to answer") + model: str = Field( + description="LLM model to use (e.g., openai/gpt-4o-mini, anthropic/claude-sonnet-4, groq/llama-3.3-70b-versatile)" + ) + location: str = Field( + default="us", description="Country code for localized search results" + ) + provider_key: str | None = Field( + default=None, description="Your own API key for the model provider (optional)" + ) + system_prompt: str | None = Field( + default=None, description="Custom system prompt to override default behavior" + ) + response_language: str = Field( + default="auto", + description="Language for the response (auto detects from query)", + ) + answer_type: Literal["markdown", "html", "json"] = Field( + default="markdown", description="Format of the response" + ) + search_type: Literal["general", "news"] = Field( + default="general", description="Type of web search to perform" + ) + json_schema: str | dict | None = Field( + default=None, + description="JSON schema as string or dict for structured responses (required when answer_type=json)", + ) + citations: bool = Field( + default=False, description="Include inline citations [1] in the response" + ) + return_sources: bool = Field( + default=False, description="Return source documents used for answer generation" + ) + return_images: bool = Field( + default=False, description="Return relevant images from search" + ) + date_filter: Literal["anytime", "hour", "day", "week", "month", "year"] = Field( + default="anytime", description="Filter search results by recency" + ) + max_tokens: int = Field( + default=1500, ge=1, description="Maximum tokens in the LLM response" + ) + temperature: float = Field( + default=0.7, + ge=0.0, + le=2.0, + description="Controls randomness (0=deterministic, 2=very creative)", + ) + domain_filter: list[str] | None = Field( + default=None, + description="Include/exclude domains (use '-' prefix to exclude, e.g., ['wikipedia.org', '-reddit.com'])", + ) + max_queries: int = Field( + default=1, ge=1, le=5, description="Number of search queries to generate" + ) + search_context_size: Literal["low", "medium", "high"] = Field( + default="medium", description="Amount of search context to extract" + ) + + +class LLMLayerSearchTool(BaseTool): + name: str = "LLMLayer Answer API" + description: str = ( + "Use this tool when you need web-enhanced AI answers with citations and sources. " + "Combines real-time web search with AI to provide comprehensive answers to questions. " + "Best for: research questions, current events, fact-checking, and detailed explanations. " + "Choose this over LLMLayerWebSearchTool when you want AI-processed answers instead of raw search results." + ) + args_schema: type[BaseModel] = LLMLayerSearchInput + + api_key: str = "" + timeout: int = 90 + include_metadata: bool = False + + def __init__( + self, + api_key: str = "", + timeout: int = 90, + include_metadata: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self.api_key = api_key or os.getenv("LLMLAYER_API_KEY", "") + self.timeout = timeout + self.include_metadata = include_metadata + + def _run( + self, + query: str, + model: str, + location: str = "us", + provider_key: str | None = None, + system_prompt: str | None = None, + response_language: str = "auto", + answer_type: str = "markdown", + search_type: str = "general", + json_schema: str | dict | None = None, + citations: bool = False, + return_sources: bool = False, + return_images: bool = False, + date_filter: str = "anytime", + max_tokens: int = 1500, + temperature: float = 0.7, + domain_filter: list[str] | None = None, + max_queries: int = 1, + search_context_size: str = "medium", + ) -> str: + """Execute web search with AI-powered answer generation.""" + + if not self.api_key: + return "Error: LLMLAYER_API_KEY not set. Set it as environment variable or pass to tool initialization." + + payload: dict[str, Any] = { + "query": query, + "model": model, + "location": location, + "response_language": response_language, + "answer_type": answer_type, + "search_type": search_type, + "citations": citations, + "return_sources": return_sources, + "return_images": return_images, + "date_filter": date_filter, + "max_tokens": max_tokens, + "temperature": temperature, + "max_queries": max_queries, + "search_context_size": search_context_size, + } + + if provider_key: + payload["provider_key"] = provider_key + if system_prompt: + payload["system_prompt"] = system_prompt + if json_schema: + try: + payload["json_schema"] = json.dumps(json_schema) if isinstance(json_schema, dict) else json_schema + except (TypeError, ValueError) as e: + return f"Error: Invalid JSON schema - {str(e)}. Ensure all values are JSON-serializable." + if domain_filter: + payload["domain_filter"] = domain_filter + + try: + response = requests.post( + "https://api.llmlayer.dev/api/v1/search", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=self.timeout, + ) + + if response.status_code != 200: + try: + error_data = response.json() + detail = error_data.get("detail", {}) + error_msg = detail.get("message", f"HTTP {response.status_code}") + error_code = detail.get("error_code", "unknown") + return f"Error: LLMLayer API error [{error_code}]: {error_msg}" + except Exception: + return f"Error: HTTP {response.status_code} - {response.reason}" + + data = response.json() + llm_response = data.get("llm_response", "") + + if answer_type == "json": + result = ( + json.dumps(llm_response, indent=2) + if isinstance(llm_response, dict) + else str(llm_response) + ) + else: + result = str(llm_response) + + sources = data.get("sources", []) + if return_sources and sources: + result += "\n\n### Sources\n" + for i, source in enumerate(sources, 1): + title = source.get("title", "Source") + link = source.get("link", "") + snippet = source.get("snippet", "") + result += f"{i}. [{title}]({link})\n" + if snippet: + result += f" {snippet}\n" + + images = data.get("images", []) + if return_images and images: + result += "\n\n### Images\n" + for i, image in enumerate(images[:5], 1): + title = image.get("title", "Image") + image_url = image.get("imageUrl", "") + result += f"{i}. {title}: {image_url}\n" + + if self.include_metadata: + response_time = data.get("response_time", "N/A") + input_tokens = data.get("input_tokens", 0) + output_tokens = data.get("output_tokens", 0) + result += "\n\n---\n" + result += f"Response time: {response_time}s | Tokens: {input_tokens} in / {output_tokens} out" + + return result + + except requests.exceptions.Timeout: + return f"Error: Request timed out after {self.timeout} seconds" + except requests.exceptions.RequestException as e: + return f"Error: {e!s}" + + +# ================================ +# LLMLayer Web Search Tool +# ================================ + + +class LLMLayerWebSearchInput(BaseModel): + """Input schema for LLMLayer Web Search API.""" + + query: str = Field(description="The search query") + search_type: Literal[ + "general", "news", "shopping", "videos", "images", "scholar" + ] = Field(default="general", description="Type of search to perform") + location: str = Field( + default="us", description="Country code for localized results" + ) + recency: Literal["hour", "day", "week", "month", "year"] | None = Field( + default=None, description="Filter by time period" + ) + domain_filter: list[str] | None = Field( + default=None, description="Include/exclude domains (prefix with '-' to exclude)" + ) + + +class LLMLayerWebSearchTool(BaseTool): + name: str = "LLMLayer Web Search" + description: str = ( + "Use this tool when you need raw web search results without AI processing. " + "Returns unprocessed search results with titles, links, and snippets. " + "Best for: finding specific URLs, getting multiple search results to analyze yourself, " + "or when you need the raw data instead of an AI summary. " + "Choose LLMLayerAnswerAPI if you want AI-processed answers with citations." + ) + args_schema: type[BaseModel] = LLMLayerWebSearchInput + + api_key: str = "" + timeout: int = 30 + include_metadata: bool = False + + def __init__( + self, + api_key: str = "", + timeout: int = 30, + include_metadata: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self.api_key = api_key or os.getenv("LLMLAYER_API_KEY", "") + self.timeout = timeout + self.include_metadata = include_metadata + + def _run( + self, + query: str, + search_type: str = "general", + location: str = "us", + recency: str | None = None, + domain_filter: list[str] | None = None, + ) -> str: + """Execute web search without AI processing.""" + + if not self.api_key: + return "Error: LLMLAYER_API_KEY not set. Set it as environment variable or pass to tool initialization." + + payload: dict[str, Any] = { + "query": query, + "search_type": search_type, + "location": location, + } + + if recency: + payload["recency"] = recency + if domain_filter: + payload["domain_filter"] = domain_filter + + try: + response = requests.post( + "https://api.llmlayer.dev/api/v1/web_search", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=self.timeout, + ) + + if response.status_code != 200: + try: + error_data = response.json() + detail = error_data.get("detail", {}) + error_msg = detail.get("message", f"HTTP {response.status_code}") + error_code = detail.get("error_code", "unknown") + return f"Error: LLMLayer API error [{error_code}]: {error_msg}" + except Exception: + return f"Error: HTTP {response.status_code} - {response.reason}" + + data = response.json() + results = data.get("results", []) + + if not results: + return "No results found." + + output = ( + f"### {search_type.title()} Search Results ({len(results)} found)\n\n" + ) + + for i, result in enumerate(results[:20], 1): + title = result.get("title", "No title") + link = result.get("link", result.get("url", "")) + snippet = result.get("snippet", result.get("description", "")) + + output += f"**{i}. {title}**\n" + if link: + output += f" URL: {link}\n" + if snippet: + output += f" {snippet}\n" + output += "\n" + + if self.include_metadata: + cost = data.get("cost") + if cost is not None: + output += f"\n---\nCost: ${cost:.6f}" + + return output + + except requests.exceptions.Timeout: + return f"Error: Request timed out after {self.timeout} seconds" + except requests.exceptions.RequestException as e: + return f"Error: {e!s}" + + +# ================================ +# LLMLayer Scraper Tool +# ================================ + + +class LLMLayerScraperInput(BaseModel): + """Input schema for LLMLayer Scraper API.""" + + url: str = Field(description="URL to scrape") + format: Literal["markdown", "html", "screenshot", "pdf"] = Field( + default="markdown", description="Output format" + ) + include_images: bool = Field( + default=True, description="Include images in markdown output" + ) + include_links: bool = Field( + default=True, description="Include links in markdown output" + ) + + +class LLMLayerScraperTool(BaseTool): + name: str = "LLMLayer Scraper" + description: str = ( + "Use this tool to extract clean content from any webpage. " + "Returns structured content in markdown (default), HTML, PDF, or screenshot format. " + "Best for: extracting article text, documentation, blog posts, or any web content for analysis. " + "Use this when you have a specific URL and need its content extracted." + ) + args_schema: type[BaseModel] = LLMLayerScraperInput + + api_key: str = "" + timeout: int = 30 + include_metadata: bool = False + + def __init__( + self, + api_key: str = "", + timeout: int = 30, + include_metadata: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self.api_key = api_key or os.getenv("LLMLAYER_API_KEY", "") + self.timeout = timeout + self.include_metadata = include_metadata + + def _run( + self, + url: str, + format: str = "markdown", + include_images: bool = True, + include_links: bool = True, + ) -> str: + """Extract content from URL.""" + + if not self.api_key: + return "Error: LLMLAYER_API_KEY not set. Set it as environment variable or pass to tool initialization." + + payload = { + "url": url, + "format": format, + "include_images": include_images, + "include_links": include_links, + } + + try: + response = requests.post( + "https://api.llmlayer.dev/api/v1/scrape", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=self.timeout, + ) + + if response.status_code != 200: + try: + error_data = response.json() + detail = error_data.get("detail", {}) + error_msg = detail.get("message", f"HTTP {response.status_code}") + error_code = detail.get("error_code", "unknown") + return f"Error: LLMLayer API error [{error_code}]: {error_msg}" + except Exception: + return f"Error: HTTP {response.status_code} - {response.reason}" + + data = response.json() + final_url = data.get("url", url) + markdown = data.get("markdown", "") + html = data.get("html") + screenshot_data = data.get("screenshot_data") + pdf_data = data.get("pdf_data") + + if format == "markdown": + result = f"# Content from {final_url}\n\n{markdown}" + elif format == "html": + result = f"# HTML from {final_url}\n\n{html or markdown}" + elif format == "screenshot": + if screenshot_data: + result = f"# Screenshot from {final_url}\n\nBase64 data length: {len(screenshot_data)} characters\n\n[Screenshot captured as base64]" + else: + result = ( + f"# Screenshot from {final_url}\n\nNo screenshot data returned" + ) + elif format == "pdf": + if pdf_data: + result = f"# PDF from {final_url}\n\nBase64 data length: {len(pdf_data)} characters\n\n[PDF generated as base64]" + else: + result = f"# PDF from {final_url}\n\nNo PDF data returned" + else: + result = markdown + + if self.include_metadata: + status_code = data.get("status_code", 0) + cost = data.get("cost") + result += f"\n\n---\nStatus: {status_code}" + if cost is not None: + result += f" | Cost: ${cost:.6f}" + + return result + + except requests.exceptions.Timeout: + return f"Error: Request timed out after {self.timeout} seconds" + except requests.exceptions.RequestException as e: + return f"Error: {e!s}" + + +# ================================ +# LLMLayer PDF Tool +# ================================ + + +class LLMLayerPDFInput(BaseModel): + """Input schema for LLMLayer PDF Content API.""" + + url: str = Field(description="Direct URL to PDF document") + + +class LLMLayerPDFTool(BaseTool): + name: str = "LLMLayer PDF Extractor" + description: str = ( + "Use this tool to extract text content from PDF documents via URL. " + "Returns full text content with page count. " + "Best for: reading research papers, documents, reports in PDF format. " + "Requires a direct link to a PDF file." + ) + args_schema: type[BaseModel] = LLMLayerPDFInput + + api_key: str = "" + timeout: int = 30 + include_metadata: bool = False + + def __init__( + self, + api_key: str = "", + timeout: int = 30, + include_metadata: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self.api_key = api_key or os.getenv("LLMLAYER_API_KEY", "") + self.timeout = timeout + self.include_metadata = include_metadata + + def _run(self, url: str) -> str: + """Extract text from PDF.""" + + if not self.api_key: + return "Error: LLMLAYER_API_KEY not set. Set it as environment variable or pass to tool initialization." + + try: + response = requests.post( + "https://api.llmlayer.dev/api/v1/get_pdf_content", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json={"url": url}, + timeout=self.timeout, + ) + + if response.status_code != 200: + try: + error_data = response.json() + detail = error_data.get("detail", {}) + error_msg = detail.get("message", f"HTTP {response.status_code}") + error_code = detail.get("error_code", "unknown") + return f"Error: LLMLayer API error [{error_code}]: {error_msg}" + except Exception: + return f"Error: HTTP {response.status_code} - {response.reason}" + + data = response.json() + text = data.get("text", "") + pages = data.get("pages", 0) + final_url = data.get("url", url) + + result = f"# PDF Content from {final_url}\n\n**Pages:** {pages}\n\n{text}" + + if self.include_metadata: + status_code = data.get("status_code", 0) + cost = data.get("cost") + result += f"\n\n---\nStatus: {status_code}" + if cost is not None: + result += f" | Cost: ${cost:.6f}" + + return result + + except requests.exceptions.Timeout: + return f"Error: Request timed out after {self.timeout} seconds" + except requests.exceptions.RequestException as e: + return f"Error: {e!s}" + + +# ================================ +# LLMLayer YouTube Tool +# ================================ + + +class LLMLayerYouTubeInput(BaseModel): + """Input schema for LLMLayer YouTube Transcript API.""" + + url: str = Field(description="YouTube video URL") + language: str | None = Field( + default=None, description="Language code for transcript (e.g., 'en', 'es')" + ) + + +class LLMLayerYouTubeTool(BaseTool): + name: str = "LLMLayer YouTube Transcript" + description: str = ( + "Use this tool to extract transcripts from YouTube videos. " + "Returns full video transcript text in specified language. " + "Best for: analyzing video content, extracting information from tutorials, " + "lectures, interviews, or any YouTube video with available transcripts." + ) + args_schema: type[BaseModel] = LLMLayerYouTubeInput + + api_key: str = "" + timeout: int = 30 + include_metadata: bool = False + + def __init__( + self, + api_key: str = "", + timeout: int = 30, + include_metadata: bool = False, + **kwargs, + ): + super().__init__(**kwargs) + self.api_key = api_key or os.getenv("LLMLAYER_API_KEY", "") + self.timeout = timeout + self.include_metadata = include_metadata + + def _run(self, url: str, language: str | None = None) -> str: + """Extract YouTube transcript.""" + + if not self.api_key: + return "Error: LLMLAYER_API_KEY not set. Set it as environment variable or pass to tool initialization." + + payload = {"url": url} + if language: + payload["language"] = language + + try: + response = requests.post( + "https://api.llmlayer.dev/api/v1/youtube_transcript", + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + }, + json=payload, + timeout=self.timeout, + ) + + if response.status_code != 200: + try: + error_data = response.json() + detail = error_data.get("detail", {}) + error_msg = detail.get("message", f"HTTP {response.status_code}") + error_code = detail.get("error_code", "unknown") + return f"Error: LLMLayer API error [{error_code}]: {error_msg}" + except Exception: + return f"Error: HTTP {response.status_code} - {response.reason}" + + data = response.json() + transcript = data.get("transcript", "") + final_url = data.get("url", url) + lang = data.get("language", "unknown") + + result = f"# YouTube Transcript\n\n**URL:** {final_url}\n**Language:** {lang}\n\n{transcript}" + + if self.include_metadata: + cost = data.get("cost") + if cost is not None: + result += f"\n\n---\nCost: ${cost:.6f}" + + return result + + except requests.exceptions.Timeout: + return f"Error: Request timed out after {self.timeout} seconds" + except requests.exceptions.RequestException as e: + return f"Error: {e!s}" diff --git a/tests/tools/llmlayer_tools_test.py b/tests/tools/llmlayer_tools_test.py new file mode 100644 index 00000000..50a199ee --- /dev/null +++ b/tests/tools/llmlayer_tools_test.py @@ -0,0 +1,444 @@ +import pytest +from unittest.mock import Mock, patch +from crewai_tools.tools.llmlayer_tools import ( + LLMLayerSearchTool, + LLMLayerWebSearchTool, + LLMLayerScraperTool, + LLMLayerPDFTool, + LLMLayerYouTubeTool, +) + + +class TestLLMLayerSearchTool: + @patch('requests.post') + def test_search_basic_success(self, mock_post): + """Test basic search functionality with successful response""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "llm_response": "AI is advancing rapidly in 2024.", + "response_time": "2.5", + "input_tokens": 100, + "output_tokens": 50, + "model_cost": 0.001, + "llmlayer_cost": 0.002, + "sources": [], + "images": [] + } + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="What is AI?", model="openai/gpt-4o-mini") + + assert "AI is advancing rapidly" in result + assert "Response time:" not in result # metadata disabled by default + mock_post.assert_called_once() + + @patch('requests.post') + def test_search_with_metadata(self, mock_post): + """Test search with metadata enabled""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "llm_response": "Test response", + "response_time": "2.5", + "input_tokens": 100, + "output_tokens": 50, + "sources": [], + "images": [] + } + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key", include_metadata=True) + result = tool._run(query="test", model="openai/gpt-4o-mini") + + assert "Response time:" in result + assert "Tokens:" in result + + @patch('requests.post') + def test_search_with_sources(self, mock_post): + """Test search with sources returned""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "llm_response": "Test response", + "response_time": "1.0", + "input_tokens": 50, + "output_tokens": 30, + "sources": [ + {"title": "Source 1", "link": "https://example.com", "snippet": "Test snippet"} + ], + "images": [] + } + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="openai/gpt-4o-mini", return_sources=True) + + assert "Sources" in result + assert "Source 1" in result + assert "https://example.com" in result + + @patch('requests.post') + def test_search_with_images(self, mock_post): + """Test search with images returned""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "llm_response": "Test response", + "response_time": "1.5", + "input_tokens": 60, + "output_tokens": 40, + "sources": [], + "images": [ + {"title": "Image 1", "imageUrl": "https://example.com/image.jpg"} + ] + } + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="openai/gpt-4o-mini", return_images=True) + + assert "Images" in result + assert "Image 1" in result + + def test_search_missing_api_key(self): + """Test that missing API key returns error""" + tool = LLMLayerSearchTool(api_key="") + result = tool._run(query="test", model="openai/gpt-4o-mini") + assert "Error: LLMLAYER_API_KEY not set" in result + + @patch('requests.post') + def test_search_api_error(self, mock_post): + """Test handling of API errors""" + mock_response = Mock() + mock_response.status_code = 400 + mock_response.reason = "Bad Request" + mock_response.json.return_value = { + "detail": { + "error_code": "invalid_model", + "message": "Model not supported" + } + } + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="invalid-model") + + assert "Error:" in result + assert "invalid_model" in result + + @patch('requests.post') + def test_search_json_response(self, mock_post): + """Test JSON response format""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "llm_response": {"key": "value"}, + "response_time": "1.0", + "input_tokens": 50, + "output_tokens": 30, + "sources": [], + "images": [] + } + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="openai/gpt-4o-mini", answer_type="json") + + assert '"key"' in result + assert '"value"' in result + + +class TestLLMLayerWebSearchTool: + @patch('requests.post') + def test_web_search_basic(self, mock_post): + """Test basic web search""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "results": [ + {"title": "Test Result", "link": "https://test.com", "snippet": "Test snippet"} + ], + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerWebSearchTool(api_key="test_key", include_metadata=True) + result = tool._run(query="test query") + + assert "Test Result" in result + assert "https://test.com" in result + assert "Cost:" in result + + @patch('requests.post') + def test_web_search_no_results(self, mock_post): + """Test web search with no results""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"results": [], "cost": 0.001} + mock_post.return_value = mock_response + + tool = LLMLayerWebSearchTool(api_key="test_key") + result = tool._run(query="test") + + assert "No results found" in result + + def test_web_search_missing_api_key(self): + """Test that missing API key returns error""" + tool = LLMLayerWebSearchTool(api_key="") + result = tool._run(query="test") + assert "Error: LLMLAYER_API_KEY not set" in result + + @patch('requests.post') + def test_web_search_multiple_results(self, mock_post): + """Test web search with multiple results""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "results": [ + {"title": "Result 1", "link": "https://test1.com", "snippet": "Snippet 1"}, + {"title": "Result 2", "url": "https://test2.com", "description": "Snippet 2"} + ], + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerWebSearchTool(api_key="test_key") + result = tool._run(query="test") + + assert "Result 1" in result + assert "Result 2" in result + assert "https://test1.com" in result + assert "https://test2.com" in result + + +class TestLLMLayerScraperTool: + @patch('requests.post') + def test_scraper_markdown(self, mock_post): + """Test scraping in markdown format""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "markdown": "# Test Content\n\nThis is test content.", + "url": "https://example.com", + "status_code": 200, + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerScraperTool(api_key="test_key", include_metadata=True) + result = tool._run(url="https://example.com", format="markdown") + + assert "Test Content" in result + assert "Status: 200" in result + + @patch('requests.post') + def test_scraper_html(self, mock_post): + """Test scraping in HTML format""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "markdown": "fallback", + "html": "Test", + "url": "https://example.com", + "status_code": 200, + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerScraperTool(api_key="test_key") + result = tool._run(url="https://example.com", format="html") + + assert "" in result or "HTML" in result + + @patch('requests.post') + def test_scraper_screenshot(self, mock_post): + """Test scraping in screenshot format""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "markdown": "", + "screenshot_data": "base64encodeddata", + "url": "https://example.com", + "status_code": 200, + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerScraperTool(api_key="test_key") + result = tool._run(url="https://example.com", format="screenshot") + + assert "Screenshot" in result + assert "base64" in result + + def test_scraper_missing_api_key(self): + """Test that missing API key returns error""" + tool = LLMLayerScraperTool(api_key="") + result = tool._run(url="https://example.com") + assert "Error: LLMLAYER_API_KEY not set" in result + + +class TestLLMLayerPDFTool: + @patch('requests.post') + def test_pdf_extraction(self, mock_post): + """Test PDF text extraction""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "text": "This is extracted PDF text.", + "pages": 5, + "url": "https://example.com/doc.pdf", + "status_code": 200, + "cost": 0.005 + } + mock_post.return_value = mock_response + + tool = LLMLayerPDFTool(api_key="test_key", include_metadata=True) + result = tool._run(url="https://example.com/doc.pdf") + + assert "extracted PDF text" in result + assert "Pages:** 5" in result + assert "Status: 200" in result + + def test_pdf_missing_api_key(self): + """Test that missing API key returns error""" + tool = LLMLayerPDFTool(api_key="") + result = tool._run(url="https://example.com/doc.pdf") + assert "Error: LLMLAYER_API_KEY not set" in result + + @patch('requests.post') + def test_pdf_error_handling(self, mock_post): + """Test PDF extraction error handling""" + mock_response = Mock() + mock_response.status_code = 404 + mock_response.reason = "Not Found" + mock_response.json.return_value = { + "detail": { + "error_code": "pdf_not_found", + "message": "PDF not found" + } + } + mock_post.return_value = mock_response + + tool = LLMLayerPDFTool(api_key="test_key") + result = tool._run(url="https://example.com/missing.pdf") + + assert "Error:" in result + assert "pdf_not_found" in result + + +class TestLLMLayerYouTubeTool: + @patch('requests.post') + def test_youtube_transcript(self, mock_post): + """Test YouTube transcript extraction""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcript": "This is the video transcript.", + "url": "https://youtube.com/watch?v=test", + "language": "en", + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerYouTubeTool(api_key="test_key") + result = tool._run(url="https://youtube.com/watch?v=test") + + assert "video transcript" in result + assert "Language:** en" in result + + @patch('requests.post') + def test_youtube_with_language(self, mock_post): + """Test YouTube transcript with specific language""" + mock_response = Mock() + mock_response.status_code = 200 + mock_response.json.return_value = { + "transcript": "Transcript in Spanish", + "url": "https://youtube.com/watch?v=test", + "language": "es", + "cost": 0.001 + } + mock_post.return_value = mock_response + + tool = LLMLayerYouTubeTool(api_key="test_key") + result = tool._run(url="https://youtube.com/watch?v=test", language="es") + + assert "Transcript" in result + assert "es" in result + + def test_youtube_missing_api_key(self): + """Test that missing API key returns error""" + tool = LLMLayerYouTubeTool(api_key="") + result = tool._run(url="https://youtube.com/watch?v=test") + assert "Error: LLMLAYER_API_KEY not set" in result + + +class TestToolInitialization: + def test_api_key_from_env(self): + """Test API key loading from environment""" + with patch.dict('os.environ', {'LLMLAYER_API_KEY': 'env_key'}): + tool = LLMLayerSearchTool() + assert tool.api_key == 'env_key' + + def test_api_key_override(self): + """Test API key can be overridden""" + with patch.dict('os.environ', {'LLMLAYER_API_KEY': 'env_key'}): + tool = LLMLayerSearchTool(api_key='custom_key') + assert tool.api_key == 'custom_key' + + def test_custom_timeout(self): + """Test custom timeout configuration""" + tool = LLMLayerSearchTool(api_key='test', timeout=120) + assert tool.timeout == 120 + + def test_metadata_flag(self): + """Test metadata flag configuration""" + tool = LLMLayerSearchTool(api_key='test', include_metadata=True) + assert tool.include_metadata is True + + def test_default_values(self): + """Test default initialization values""" + tool = LLMLayerSearchTool(api_key='test') + assert tool.timeout == 90 + assert tool.include_metadata is False + + +class TestErrorHandling: + @patch('requests.post') + def test_timeout_error(self, mock_post): + """Test timeout error handling""" + import requests + mock_post.side_effect = requests.exceptions.Timeout() + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="openai/gpt-4o-mini") + + assert "Error:" in result + assert "timed out" in result + + @patch('requests.post') + def test_connection_error(self, mock_post): + """Test connection error handling""" + import requests + mock_post.side_effect = requests.exceptions.ConnectionError() + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="openai/gpt-4o-mini") + + assert "Error:" in result + + @patch('requests.post') + def test_malformed_json_response(self, mock_post): + """Test handling of malformed JSON in error response""" + mock_response = Mock() + mock_response.status_code = 500 + mock_response.reason = "Internal Server Error" + mock_response.json.side_effect = Exception("Invalid JSON") + mock_post.return_value = mock_response + + tool = LLMLayerSearchTool(api_key="test_key") + result = tool._run(query="test", model="openai/gpt-4o-mini") + + assert "Error:" in result + assert "500" in result \ No newline at end of file diff --git a/uv.lock b/uv.lock index b13035c6..2acfa015 100644 --- a/uv.lock +++ b/uv.lock @@ -972,6 +972,7 @@ dependencies = [ { name = "beautifulsoup4" }, { name = "crewai" }, { name = "docker" }, + { name = "httpx" }, { name = "lancedb" }, { name = "pypdf" }, { name = "python-docx" }, @@ -1124,6 +1125,7 @@ requires-dist = [ { name = "exa-py", marker = "extra == 'exa-py'", specifier = ">=1.8.7" }, { name = "firecrawl-py", marker = "extra == 'firecrawl-py'", specifier = ">=1.8.0" }, { name = "gitpython", marker = "extra == 'github'", specifier = "==3.1.38" }, + { name = "httpx", specifier = ">=0.27.0" }, { name = "hyperbrowser", marker = "extra == 'hyperbrowser'", specifier = ">=0.18.0" }, { name = "lancedb", specifier = ">=0.5.4" }, { name = "langchain-apify", marker = "extra == 'apify'", specifier = ">=0.1.2,<1.0.0" },