diff --git a/README.md b/README.md index 55a888b..0fdf3b9 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,7 @@ Discord

-
-

🎉We are going open source🎉

-

- Let us know if you're interested in contributing! We're working on integrating the core logic for getting elements and extraction into the sdk! -

-
+> **Notice:** The Dendrite SDK is not under active development anymore. However, the project will remain fully open source so that you and others can learn from it. Feel free to fork, study, or adapt this code for your own projects as you wish – reach out to us on Discord if you have questions! We love chatting about web AI agents. 🤖 ## What is Dendrite? @@ -24,33 +19,60 @@ #### A simple outlook integration -With Dendrite, it's easy to create web interaction tools for your agent. +With Dendrite it's easy to create web interaction tools for your agent. + +Here's how you can send an email: ```python -from dendrite import Dendrite +from dendrite import AsyncDendrite + -def send_email(): - client = Dendrite(auth="outlook.live.com") +async def send_email(to, subject, message): + client = AsyncDendrite(auth="outlook.live.com") # Navigate - client.goto( - "https://outlook.live.com/mail/0/", - expected_page="An email inbox" + await client.goto( + "https://outlook.live.com/mail/0/", expected_page="An email inbox" ) # Create new email and populate fields - client.click("The new email button") - client.fill_fields({ - "Recipient": to, - "Subject": subject, - "Message": message - }) + await client.click("The new email button") + await client.fill("The recipient field", to) + await client.press("Enter") + await client.fill("The subject field", subject) + await client.fill("The message field", message) # Send email - client.click("The send button") + await client.press("Enter", hold_cmd=True) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(send_email("test@example.com", "Hello", "This is a test email")) + ``` -To authenticate you'll need to use our Chrome Extension **Dendrite Vault**, you can download it [here](https://chromewebstore.google.com/detail/dendrite-vault/faflkoombjlhkgieldilpijjnblgabnn). Read more about authentication [in our docs](https://docs.dendrite.systems/examples/authentication-instagram). +You'll need to add your own Anthropic key or [configure which LLMs to use yourself](https://docs.dendrite.systems/concepts/config). + + +```.env +ANTHROPIC_API_KEY=sk-... +``` + +To **authenticate** on any web service with Dendrite, follow these steps: + +1. Run the authentication command + + ```bash + dendrite auth --url outlook.live.com + ``` + +2. This command will open a browser that you'll be able to login with. + +3. After you've logged in, press enter in your terminal. This will save your cookies locally so that they can be used in your code. + +Read more about authentication [in our docs](https://docs.dendrite.systems/examples/authentication). ## Quickstart @@ -60,81 +82,159 @@ pip install dendrite && dendrite install #### Simple navigation and interaction -Initialize the Dendrite client and start doing web interactions without boilerplate. - -[Get your API key here](https://dendrite.systems/app) - ```python -from dendrite import Dendrite +from dendrite import AsyncDendrite + +async def main(): + client = AsyncDendrite() -client = Dendrite(dendrite_api_key="sk...") + await client.goto("https://google.com") + await client.fill("Search field", "Hello world") + await client.press("Enter") -client.goto("https://google.com") -client.fill("Search field", "Hello world") -client.press("Enter") +if __name__ == "__main__": + import asyncio + asyncio.run(main()) ``` In the example above, we simply go to Google, populate the search field with "Hello world" and simulate a keypress on Enter. It's a simple example that starts to explore the endless possibilities with Dendrite. Now you can create tools for your agents that have access to the full web without depending on APIs. -## More powerful examples +## More Examples -Now, let's have some fun. Earlier we showed you a simple send_email example. And sending emails is cool, but if that's all our agent can do it kind of sucks. So let's create two cooler examples. +### Get any page as markdown -### Download Bank Transactions +This is a simple example of how to get any page as markdown, great for feeding to an LLM. + +```python +from dendrite import AsyncDendrite +from dotenv import load_dotenv + +async def main(): + browser = AsyncDendrite() + + await browser.goto("https://dendrite.systems") + await browser.wait_for("the page to load") + + # Get the entire page as markdown + md = await browser.markdown() + print(md) + print("=" * 200) + + # Only get a certain part of the page as markdown + data_extraction_md = await browser.markdown("the part about data extraction") + print(data_extraction_md) + +if __name__ == "__main__": + import asyncio + asyncio.run(main()) +``` -First up, a tool that allows our AI agent to download our bank's monthly transactions so that they can be analyzed and compiled into a report that can be sent to stakeholders with `send_email`. +### Get Company Data from Y Combinator + +The classic web data extraction test, made easy: ```python -from dendrite import Dendrite +from dendrite import AsyncDendrite +import pprint +import asyncio -def get_transactions() -> str: - client = Dendrite(auth="mercury.com") - # Navigate and wait for loading - client.goto( - "https://app.mercury.com/transactions", - expected_page="Dashboard with transactions" - ) - client.wait_for("The transactions to finish loading") +async def main(): + browser = AsyncDendrite() - # Modify filters - client.click("The 'add filter' button") - client.click("The 'show transactions for' dropdown") - client.click("The 'this month' option") + # Navigate + await browser.goto("https://www.ycombinator.com/companies") + + # Find and fill the search field with "AI agent" + await browser.fill( + "Search field", value="AI agent" + ) # Element selector cached since before + await browser.press("Enter") + + # Extract startups with natural language description + # Once created by our agent, the same script will be cached and reused + startups = await browser.extract( + "All companies. Return a list of dicts with name, location, description and url" + ) + pprint.pprint(startups, indent=2) - # Download file - client.click("The 'export filtered' button") - transactions = client.get_download() - # Save file locally - path = "files/transactions.xlsx" - transactions.save_as(path) +if __name__ == "__main__": + asyncio.run(main()) - return path +``` -def analyze_transactions(path: str): - ... +returns ``` +[ { 'description': 'Book accommodations around the world.', + 'location': 'San Francisco, CA, USA', + 'name': 'Airbnb', + 'url': 'https://www.ycombinator.com/companies/airbnb'}, + { 'description': 'Digital Analytics Platform', + 'location': 'San Francisco, CA, USA', + 'name': 'Amplitude', + 'url': 'https://www.ycombinator.com/companies/amplitude'}, +... +] } +``` + -### Extract Google Analytics +### Extract Data from Google Analytics -Finally, it would be cool if we could add the amount of monthly visitors from Google Analytics to our report. We can do that by using the `extract` function: +Here's how to get the amount of monthly visitors from Google Analytics using the `extract` function: ```python -def get_visitor_count() -> int: - client = Dendrite(auth="analytics.google.com") +async def get_visitor_count() -> int: + client = AsyncDendrite(auth="analytics.google.com") - client.goto( + await client.goto( "https://analytics.google.com/analytics/web", expected_page="Google Analytics dashboard" ) # The Dendrite extract agent will create a web script that is cached # and reused. It will self-heal when the website updates - visitor_count = client.extract("The amount of visitors this month", int) + visitor_count = await client.extract("The amount of visitors this month", int) return visitor_count ``` +### Download Bank Transactions + +Here's tool that allows our AI agent to download our bank's monthly transactions so that they can be analyzed and compiled into a report. + +```python +from dendrite import AsyncDendrite + +async def get_transactions() -> str: + client = AsyncDendrite(auth="mercury.com") + + # Navigate and wait for loading + await client.goto( + "https://app.mercury.com/transactions", + expected_page="Dashboard with transactions" + ) + await client.wait_for("The transactions to finish loading") + + # Modify filters + await client.click("The 'add filter' button") + await client.click("The 'show transactions for' dropdown") + await client.click("The 'this month' option") + + # Download file + await client.click("The 'export filtered' button") + transactions = await client.get_download() + + # Save file locally + path = "files/transactions.xlsx" + await transactions.save_as(path) + + return path + +async def analyze_transactions(path: str): + ... # Analyze the transactions with LLM of our choice +``` + + ## Documentation [Read the full docs here](https://docs.dendrite.systems) @@ -145,7 +245,7 @@ def get_visitor_count() -> int: When you want to scale up your AI agents, we support using browsers hosted by Browserbase. This way you can run many agents in parallel without having to worry about the infrastructure. -To start using Browserbase just swap out the `Dendrite` class with `DendriteRemoteBrowser` and add your Browserbase API key and project id, either in the code or in a `.env` file like this: +To start using Browserbase just swap out the `AsyncDendrite` class with `AsyncDendriteRemoteBrowser` and add your Browserbase API key and project id, either in the code or in a `.env` file like this: ```bash # ... previous keys @@ -154,17 +254,19 @@ BROWSERBASE_PROJECT_ID= ``` ```python -# from dendrite import Dendrite -from dendrite import DendriteRemoteBrowser - -... - -# client = Dendrite(...) -client = DendriteRemoteBrowser( - # Use interchangeably with the Dendrite class - browserbase_api_key="...", # or specify the browsebase keys in the .env file - browserbase_project_id="..." -) +# from dendrite import AsyncDendrite +from dendrite import AsyncDendriteRemoteBrowser + +async def main(): + # client = AsyncDendrite(...) + client = AsyncDendriteRemoteBrowser( + # Use interchangeably with the AsyncDendrite class + browserbase_api_key="...", # or specify the browsebase keys in the .env file + browserbase_project_id="..." + ) + ... -... +if __name__ == "__main__": + import asyncio + asyncio.run(main()) ``` diff --git a/dendrite/__init__.py b/dendrite/__init__.py index 931c728..8184634 100644 --- a/dendrite/__init__.py +++ b/dendrite/__init__.py @@ -1,33 +1,22 @@ import sys -from loguru import logger -from dendrite.async_api import ( - AsyncDendrite, - AsyncElement, - AsyncPage, - AsyncElementsResponse, -) -from dendrite.sync_api import ( +from dendrite._loggers.d_logger import logger +from dendrite.browser.async_api import AsyncDendrite, AsyncElement, AsyncPage +from dendrite.logic.config import Config + +from dendrite.browser.sync_api import ( Dendrite, Element, Page, - ElementsResponse, ) -logger.remove() - -fmt = "{time: HH:mm:ss.SSS} | {level: <8}- {message}" - -logger.add(sys.stderr, level="INFO", format=fmt) - __all__ = [ "AsyncDendrite", "AsyncElement", "AsyncPage", - "AsyncElementsResponse", "Dendrite", "Element", "Page", - "ElementsResponse", + "Config", ] diff --git a/dendrite/_common/_exceptions/__init__.py b/dendrite/_cli/__init__.py similarity index 100% rename from dendrite/_common/_exceptions/__init__.py rename to dendrite/_cli/__init__.py diff --git a/dendrite/_cli/main.py b/dendrite/_cli/main.py index 370e4de..4dc798b 100644 --- a/dendrite/_cli/main.py +++ b/dendrite/_cli/main.py @@ -1,7 +1,11 @@ import argparse +import asyncio import subprocess import sys +from dendrite.browser.async_api import AsyncDendrite +from dendrite.logic.config import Config + def run_playwright_install(): try: @@ -17,14 +21,35 @@ def run_playwright_install(): sys.exit(1) +async def setup_auth(url: str): + try: + async with AsyncDendrite() as browser: + await browser.setup_auth( + url=url, + message="Please log in to the website. Once done, press Enter to continue...", + ) + except Exception as e: + print(f"Error during authentication setup: {e}") + sys.exit(1) + + def main(): parser = argparse.ArgumentParser(description="Dendrite SDK CLI tool") - parser.add_argument("command", choices=["install"], help="Command to execute") + parser.add_argument( + "command", choices=["install", "auth"], help="Command to execute" + ) + + # Add auth-specific arguments + parser.add_argument("--url", help="URL to navigate to for authentication") args = parser.parse_args() if args.command == "install": run_playwright_install() + elif args.command == "auth": + if not args.url: + parser.error("The --url argument is required for the auth command") + asyncio.run(setup_auth(args.url)) if __name__ == "__main__": diff --git a/dendrite/_loggers/d_logger.py b/dendrite/_loggers/d_logger.py new file mode 100644 index 0000000..0ff3276 --- /dev/null +++ b/dendrite/_loggers/d_logger.py @@ -0,0 +1,7 @@ +import sys + +from loguru import logger + +logger.remove() +fmt = "{time: HH:mm:ss.SSS} | {level: <8} | {message}" +logger.add(sys.stderr, level="DEBUG", format=fmt) diff --git a/dendrite/async_api/__init__.py b/dendrite/async_api/__init__.py deleted file mode 100644 index 48accf0..0000000 --- a/dendrite/async_api/__init__.py +++ /dev/null @@ -1,12 +0,0 @@ -from loguru import logger -from ._core.dendrite_browser import AsyncDendrite -from ._core.dendrite_element import AsyncElement -from ._core.dendrite_page import AsyncPage -from ._core.models.response import AsyncElementsResponse - -__all__ = [ - "AsyncDendrite", - "AsyncElement", - "AsyncPage", - "AsyncElementsResponse", -] diff --git a/dendrite/async_api/_api/_http_client.py b/dendrite/async_api/_api/_http_client.py deleted file mode 100644 index 9e694a6..0000000 --- a/dendrite/async_api/_api/_http_client.py +++ /dev/null @@ -1,66 +0,0 @@ -import os -from typing import Optional -import httpx -from loguru import logger - - -from dendrite.async_api._core.models.api_config import APIConfig - - -class HTTPClient: - def __init__(self, api_config: APIConfig, session_id: Optional[str] = None): - self.api_key = api_config.dendrite_api_key - self.session_id = session_id - self.base_url = self.resolve_base_url() - - def resolve_base_url(self): - base_url = ( - "http://localhost:8000/api/v1" - if os.environ.get("DENDRITE_DEV") - else "https://dendrite-server.azurewebsites.net/api/v1" - ) - return base_url - - async def send_request( - self, - endpoint: str, - params: Optional[dict] = None, - data: Optional[dict] = None, - headers: Optional[dict] = None, - method: str = "GET", - ) -> httpx.Response: - url = f"{self.base_url}/{endpoint}" - - headers = headers or {} - headers["Content-Type"] = "application/json" - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - if self.session_id: - headers["X-Session-ID"] = self.session_id - - async with httpx.AsyncClient(timeout=300) as client: - try: - response = await client.request( - method, url, params=params, json=data, headers=headers - ) - response.raise_for_status() - # logger.debug( - # f"{method} to '{url}', that took: { time.time() - start_time }\n\nResponse: {dict_res}\n\n" - # ) - return response - except httpx.HTTPStatusError as http_err: - logger.debug( - f"HTTP error occurred: {http_err.response.status_code}: {http_err.response.text}" - ) - raise - except httpx.ConnectError as connect_err: - logger.error( - f"Connection error occurred: {connect_err}. {url} Server might be down" - ) - raise - except httpx.RequestError as req_err: - # logger.debug(f"Request error occurred: {req_err}") - raise - except Exception as err: - # logger.debug(f"An error occurred: {err}") - raise diff --git a/dendrite/async_api/_api/browser_api_client.py b/dendrite/async_api/_api/browser_api_client.py deleted file mode 100644 index 2de035d..0000000 --- a/dendrite/async_api/_api/browser_api_client.py +++ /dev/null @@ -1,120 +0,0 @@ -from typing import Optional - -from loguru import logger -from dendrite.async_api._api.response.cache_extract_response import ( - CacheExtractResponse, -) -from dendrite.async_api._api.response.selector_cache_response import ( - SelectorCacheResponse, -) -from dendrite.async_api._core.models.authentication import AuthSession -from dendrite.async_api._api.response.get_element_response import GetElementResponse -from dendrite.async_api._api.dto.ask_page_dto import AskPageDTO -from dendrite.async_api._api.dto.authenticate_dto import AuthenticateDTO -from dendrite.async_api._api.dto.get_elements_dto import GetElementsDTO -from dendrite.async_api._api.dto.make_interaction_dto import MakeInteractionDTO -from dendrite.async_api._api.dto.extract_dto import ExtractDTO -from dendrite.async_api._api.dto.try_run_script_dto import TryRunScriptDTO -from dendrite.async_api._api.dto.upload_auth_session_dto import UploadAuthSessionDTO -from dendrite.async_api._api.response.ask_page_response import AskPageResponse -from dendrite.async_api._api.response.interaction_response import ( - InteractionResponse, -) -from dendrite.async_api._api.response.extract_response import ExtractResponse -from dendrite.async_api._api._http_client import HTTPClient -from dendrite._common._exceptions.dendrite_exception import ( - InvalidAuthSessionError, -) -from dendrite.async_api._api.dto.get_elements_dto import CheckSelectorCacheDTO - - -class BrowserAPIClient(HTTPClient): - - async def authenticate(self, dto: AuthenticateDTO): - res = await self.send_request( - "actions/authenticate", data=dto.model_dump(), method="POST" - ) - - if res.status_code == 204: - raise InvalidAuthSessionError(domain=dto.domains) - - return AuthSession(**res.json()) - - async def upload_auth_session(self, dto: UploadAuthSessionDTO): - await self.send_request( - "actions/upload-auth-session", data=dto.dict(), method="POST" - ) - - async def check_selector_cache( - self, dto: CheckSelectorCacheDTO - ) -> SelectorCacheResponse: - res = await self.send_request( - "actions/check-selector-cache", data=dto.dict(), method="POST" - ) - return SelectorCacheResponse(**res.json()) - - async def get_interactions_selector( - self, dto: GetElementsDTO - ) -> GetElementResponse: - res = await self.send_request( - "actions/get-interaction-selector", data=dto.dict(), method="POST" - ) - return GetElementResponse(**res.json()) - - async def make_interaction(self, dto: MakeInteractionDTO) -> InteractionResponse: - res = await self.send_request( - "actions/make-interaction", data=dto.dict(), method="POST" - ) - res_dict = res.json() - return InteractionResponse( - status=res_dict["status"], message=res_dict["message"] - ) - - async def check_extract_cache(self, dto: ExtractDTO) -> CacheExtractResponse: - res = await self.send_request( - "actions/check-extract-cache", data=dto.dict(), method="POST" - ) - return CacheExtractResponse(**res.json()) - - async def extract(self, dto: ExtractDTO) -> ExtractResponse: - res = await self.send_request( - "actions/extract-page", data=dto.dict(), method="POST" - ) - res_dict = res.json() - return ExtractResponse( - status=res_dict["status"], - message=res_dict["message"], - return_data=res_dict["return_data"], - created_script=res_dict.get("created_script", None), - used_cache=res_dict.get("used_cache", False), - ) - - async def ask_page(self, dto: AskPageDTO) -> AskPageResponse: - res = await self.send_request( - "actions/ask-page", data=dto.dict(), method="POST" - ) - res_dict = res.json() - return AskPageResponse( - status=res_dict["status"], - description=res_dict["description"], - return_data=res_dict["return_data"], - ) - - async def try_run_cached(self, dto: TryRunScriptDTO) -> Optional[ExtractResponse]: - res = await self.send_request( - "actions/try-run-cached", data=dto.dict(), method="POST" - ) - if res is None: - return None - res_dict = res.json() - loaded_value = res_dict["return_data"] - if loaded_value is None: - return None - - return ExtractResponse( - status=res_dict["status"], - message=res_dict["message"], - return_data=loaded_value, - created_script=res_dict.get("created_script", None), - used_cache=res_dict.get("used_cache", False), - ) diff --git a/dendrite/async_api/_api/dto/ask_page_dto.py b/dendrite/async_api/_api/dto/ask_page_dto.py deleted file mode 100644 index 770d172..0000000 --- a/dendrite/async_api/_api/dto/ask_page_dto.py +++ /dev/null @@ -1,11 +0,0 @@ -from typing import Any, Optional -from pydantic import BaseModel -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._core.models.page_information import PageInformation - - -class AskPageDTO(BaseModel): - prompt: str - return_schema: Optional[Any] - page_information: PageInformation - api_config: APIConfig diff --git a/dendrite/async_api/_api/dto/authenticate_dto.py b/dendrite/async_api/_api/dto/authenticate_dto.py deleted file mode 100644 index f5a1de7..0000000 --- a/dendrite/async_api/_api/dto/authenticate_dto.py +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Union -from pydantic import BaseModel - - -class AuthenticateDTO(BaseModel): - domains: Union[str, list[str]] diff --git a/dendrite/async_api/_api/dto/get_interaction_dto.py b/dendrite/async_api/_api/dto/get_interaction_dto.py deleted file mode 100644 index 1d93432..0000000 --- a/dendrite/async_api/_api/dto/get_interaction_dto.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._core.models.page_information import PageInformation - - -class GetInteractionDTO(BaseModel): - page_information: PageInformation - api_config: APIConfig - prompt: str diff --git a/dendrite/async_api/_api/dto/get_session_dto.py b/dendrite/async_api/_api/dto/get_session_dto.py deleted file mode 100644 index 6414cc3..0000000 --- a/dendrite/async_api/_api/dto/get_session_dto.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import List -from pydantic import BaseModel - - -class GetSessionDTO(BaseModel): - user_id: str - domain: str diff --git a/dendrite/async_api/_api/dto/google_search_dto.py b/dendrite/async_api/_api/dto/google_search_dto.py deleted file mode 100644 index 8a16a1f..0000000 --- a/dendrite/async_api/_api/dto/google_search_dto.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Optional -from pydantic import BaseModel -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._core.models.page_information import PageInformation - - -class GoogleSearchDTO(BaseModel): - query: str - country: Optional[str] = None - filter_results_prompt: Optional[str] = None - page_information: PageInformation - api_config: APIConfig diff --git a/dendrite/async_api/_api/dto/make_interaction_dto.py b/dendrite/async_api/_api/dto/make_interaction_dto.py deleted file mode 100644 index 8edbc06..0000000 --- a/dendrite/async_api/_api/dto/make_interaction_dto.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import Literal, Optional -from pydantic import BaseModel -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._core.models.page_diff_information import ( - PageDiffInformation, -) - - -InteractionType = Literal["click", "fill", "hover"] - - -class MakeInteractionDTO(BaseModel): - url: str - dendrite_id: str - interaction_type: InteractionType - value: Optional[str] = None - expected_outcome: Optional[str] - page_delta_information: PageDiffInformation - api_config: APIConfig diff --git a/dendrite/async_api/_api/dto/try_run_script_dto.py b/dendrite/async_api/_api/dto/try_run_script_dto.py deleted file mode 100644 index 2926401..0000000 --- a/dendrite/async_api/_api/dto/try_run_script_dto.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Any, Optional -from pydantic import BaseModel -from dendrite.async_api._core.models.api_config import APIConfig - - -class TryRunScriptDTO(BaseModel): - url: str - raw_html: str - api_config: APIConfig - prompt: str - db_prompt: Optional[str] = ( - None # If you wish to cache a script based of a fixed prompt use this value - ) - return_data_json_schema: Any diff --git a/dendrite/async_api/_api/dto/upload_auth_session_dto.py b/dendrite/async_api/_api/dto/upload_auth_session_dto.py deleted file mode 100644 index ecb68e1..0000000 --- a/dendrite/async_api/_api/dto/upload_auth_session_dto.py +++ /dev/null @@ -1,11 +0,0 @@ -from pydantic import BaseModel - -from dendrite.async_api._core.models.authentication import ( - AuthSession, - StorageState, -) - - -class UploadAuthSessionDTO(BaseModel): - auth_data: AuthSession - storage_state: StorageState diff --git a/dendrite/async_api/_api/response/cache_extract_response.py b/dendrite/async_api/_api/response/cache_extract_response.py deleted file mode 100644 index 463d03b..0000000 --- a/dendrite/async_api/_api/response/cache_extract_response.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class CacheExtractResponse(BaseModel): - exists: bool diff --git a/dendrite/async_api/_api/response/extract_response.py b/dendrite/async_api/_api/response/extract_response.py deleted file mode 100644 index ffc0e34..0000000 --- a/dendrite/async_api/_api/response/extract_response.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Generic, Optional, TypeVar -from pydantic import BaseModel - -from dendrite.async_api._common.status import Status - - -T = TypeVar("T") - - -class ExtractResponse(BaseModel, Generic[T]): - return_data: T - message: str - created_script: Optional[str] = None - status: Status - used_cache: bool diff --git a/dendrite/async_api/_api/response/google_search_response.py b/dendrite/async_api/_api/response/google_search_response.py deleted file mode 100644 index d435b71..0000000 --- a/dendrite/async_api/_api/response/google_search_response.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import List -from pydantic import BaseModel - - -class SearchResult(BaseModel): - url: str - title: str - description: str - - -class GoogleSearchResponse(BaseModel): - results: List[SearchResult] diff --git a/dendrite/async_api/_api/response/interaction_response.py b/dendrite/async_api/_api/response/interaction_response.py deleted file mode 100644 index 3d24a6a..0000000 --- a/dendrite/async_api/_api/response/interaction_response.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel -from dendrite.async_api._common.status import Status - - -class InteractionResponse(BaseModel): - message: str - status: Status diff --git a/dendrite/async_api/_api/response/selector_cache_response.py b/dendrite/async_api/_api/response/selector_cache_response.py deleted file mode 100644 index 4c0e388..0000000 --- a/dendrite/async_api/_api/response/selector_cache_response.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class SelectorCacheResponse(BaseModel): - exists: bool diff --git a/dendrite/async_api/_api/response/session_response.py b/dendrite/async_api/_api/response/session_response.py deleted file mode 100644 index 2d03b97..0000000 --- a/dendrite/async_api/_api/response/session_response.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import List -from pydantic import BaseModel - - -class SessionResponse(BaseModel): - cookies: List[dict] - origins_storage: List[dict] diff --git a/dendrite/async_api/_core/_impl_browser.py b/dendrite/async_api/_core/_impl_browser.py deleted file mode 100644 index c4e0f99..0000000 --- a/dendrite/async_api/_core/_impl_browser.py +++ /dev/null @@ -1,88 +0,0 @@ -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_browser import AsyncDendrite - -from dendrite.async_api._core._type_spec import PlaywrightPage -from playwright.async_api import Download, Browser, Playwright - - -class ImplBrowser(ABC): - @abstractmethod - def __init__(self, settings): - pass - # self.settings = settings - - @abstractmethod - async def get_download( - self, dendrite_browser: "AsyncDendrite", pw_page: PlaywrightPage, timeout: float - ) -> Download: - """ - Retrieves the download event from the browser. - - Returns: - Download: The download event. - - Raises: - Exception: If there is an issue retrieving the download event. - """ - pass - - @abstractmethod - async def start_browser(self, playwright: Playwright, pw_options: dict) -> Browser: - """ - Starts the browser session. - - Returns: - Browser: The browser session. - - Raises: - Exception: If there is an issue starting the browser session. - """ - pass - - @abstractmethod - async def configure_context(self, browser: "AsyncDendrite") -> None: - """ - Configures the browser context. - - Args: - browser (AsyncDendrite): The browser to configure. - - Raises: - Exception: If there is an issue configuring the browser context. - """ - pass - - @abstractmethod - async def stop_session(self) -> None: - """ - Stops the browser session. - - Raises: - Exception: If there is an issue stopping the browser session. - """ - pass - - -class LocalImpl(ImplBrowser): - def __init__(self) -> None: - pass - - async def start_browser(self, playwright: Playwright, pw_options) -> Browser: - return await playwright.chromium.launch(**pw_options) - - async def get_download( - self, - dendrite_browser: "AsyncDendrite", - pw_page: PlaywrightPage, - timeout: float, - ) -> Download: - return await dendrite_browser._download_handler.get_data(pw_page, timeout) - - async def configure_context(self, browser: "AsyncDendrite"): - pass - - async def stop_session(self): - pass diff --git a/dendrite/async_api/_core/_impl_mapping.py b/dendrite/async_api/_core/_impl_mapping.py deleted file mode 100644 index 3268943..0000000 --- a/dendrite/async_api/_core/_impl_mapping.py +++ /dev/null @@ -1,34 +0,0 @@ -from typing import Any, Dict, Optional, Type - -from dendrite.async_api._core._impl_browser import ImplBrowser, LocalImpl - -from dendrite.async_api._ext_impl.browserbase._impl import BrowserBaseImpl -from dendrite.async_api._ext_impl.browserless._impl import BrowserlessImpl -from dendrite.remote.browserless_config import BrowserlessConfig -from dendrite.remote.browserbase_config import BrowserbaseConfig -from dendrite.remote import Providers - -IMPL_MAPPING: Dict[Type[Providers], Type[ImplBrowser]] = { - BrowserbaseConfig: BrowserBaseImpl, - BrowserlessConfig: BrowserlessImpl, - # BFloatProviderConfig: , -} - -SETTINGS_CLASSES: Dict[str, Type[Providers]] = { - "browserbase": BrowserbaseConfig, - "browserless": BrowserlessConfig, -} - - -def get_impl(remote_provider: Optional[Providers]) -> ImplBrowser: - if remote_provider is None: - return LocalImpl() - - try: - provider_class = IMPL_MAPPING[type(remote_provider)] - except KeyError: - raise ValueError( - f"No implementation for {type(remote_provider)}. Available providers: {', '.join(map(lambda x: x.__name__, IMPL_MAPPING.keys()))}" - ) - - return provider_class(remote_provider) diff --git a/dendrite/async_api/_core/_type_spec.py b/dendrite/async_api/_core/_type_spec.py deleted file mode 100644 index 8252e08..0000000 --- a/dendrite/async_api/_core/_type_spec.py +++ /dev/null @@ -1,44 +0,0 @@ -import inspect -from typing import Any, Dict, Literal, Type, TypeVar, Union -from pydantic import BaseModel -from playwright.async_api import Page - - -Interaction = Literal["click", "fill", "hover"] - -T = TypeVar("T") -PydanticModel = TypeVar("PydanticModel", bound=BaseModel) -PrimitiveTypes = PrimitiveTypes = Union[Type[bool], Type[int], Type[float], Type[str]] -JsonSchema = Dict[str, Any] -TypeSpec = Union[PrimitiveTypes, PydanticModel, JsonSchema] - -PlaywrightPage = Page - - -def to_json_schema(type_spec: TypeSpec) -> Dict[str, Any]: - if isinstance(type_spec, dict): - # Assume it's already a JSON schema - return type_spec - if inspect.isclass(type_spec) and issubclass(type_spec, BaseModel): - # Convert Pydantic model to JSON schema - return type_spec.model_json_schema() - if type_spec in (bool, int, float, str): - # Convert basic Python types to JSON schema - type_map = {bool: "boolean", int: "integer", float: "number", str: "string"} - return {"type": type_map[type_spec]} - - raise ValueError(f"Unsupported type specification: {type_spec}") - - -def convert_to_type_spec(type_spec: TypeSpec, return_data: Any) -> TypeSpec: - if isinstance(type_spec, type): - if issubclass(type_spec, BaseModel): - return type_spec.model_validate(return_data) - if type_spec in (str, float, bool, int): - return type_spec(return_data) - - raise ValueError(f"Unsupported type: {type_spec}") - if isinstance(type_spec, dict): - return return_data - - raise ValueError(f"Unsupported type specification: {type_spec}") diff --git a/dendrite/async_api/_core/_utils.py b/dendrite/async_api/_core/_utils.py deleted file mode 100644 index f030135..0000000 --- a/dendrite/async_api/_core/_utils.py +++ /dev/null @@ -1,123 +0,0 @@ -from typing import Optional, Union, List, TYPE_CHECKING -from playwright.async_api import FrameLocator, ElementHandle, Error, Frame -from bs4 import BeautifulSoup -from loguru import logger - -from dendrite.async_api._api.response.get_element_response import GetElementResponse -from dendrite.async_api._core._type_spec import PlaywrightPage -from dendrite.async_api._core.dendrite_element import AsyncElement -from dendrite.async_api._core.models.response import AsyncElementsResponse - -if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_page import AsyncPage - -from dendrite.async_api._core._js import ( - GENERATE_DENDRITE_IDS_IFRAME_SCRIPT, -) -from dendrite.async_api._dom.util.mild_strip import mild_strip_in_place - - -async def expand_iframes( - page: PlaywrightPage, - page_soup: BeautifulSoup, -): - async def get_iframe_path(frame: Frame): - path_parts = [] - current_frame = frame - while current_frame.parent_frame is not None: - iframe_element = await current_frame.frame_element() - iframe_id = await iframe_element.get_attribute("d-id") - if iframe_id is None: - # If any iframe_id in the path is None, we cannot build the path - return None - path_parts.insert(0, iframe_id) - current_frame = current_frame.parent_frame - return "|".join(path_parts) - - for frame in page.frames: - if frame.parent_frame is None: - continue # Skip the main frame - iframe_element = await frame.frame_element() - iframe_id = await iframe_element.get_attribute("d-id") - if iframe_id is None: - continue - iframe_path = await get_iframe_path(frame) - if iframe_path is None: - continue - try: - await frame.evaluate( - GENERATE_DENDRITE_IDS_IFRAME_SCRIPT, {"frame_path": iframe_path} - ) - frame_content = await frame.content() - frame_tree = BeautifulSoup(frame_content, "lxml") - mild_strip_in_place(frame_tree) - merge_iframe_to_page(iframe_id, page_soup, frame_tree) - except Error as e: - logger.debug(f"Error processing frame {iframe_id}: {e}") - continue - - -def merge_iframe_to_page( - iframe_id: str, - page: BeautifulSoup, - iframe: BeautifulSoup, -): - iframe_element = page.find("iframe", {"d-id": iframe_id}) - if iframe_element is None: - logger.debug(f"Could not find iframe with ID {iframe_id} in page soup") - return - - iframe_element.replace_with(iframe) - - -async def _get_all_elements_from_selector_soup( - selector: str, soup: BeautifulSoup, page: "AsyncPage" -) -> List[AsyncElement]: - dendrite_elements: List[AsyncElement] = [] - - elements = soup.select(selector) - - for element in elements: - frame = page._get_context(element) - d_id = element.get("d-id", "") - locator = frame.locator(f"xpath=//*[@d-id='{d_id}']") - - if not d_id: - continue - - if isinstance(d_id, list): - d_id = d_id[0] - dendrite_elements.append( - AsyncElement(d_id, locator, page.dendrite_browser, page._browser_api_client) - ) - - return dendrite_elements - - -async def get_elements_from_selectors_soup( - page: "AsyncPage", - soup: BeautifulSoup, - res: GetElementResponse, - only_one: bool, -) -> Union[Optional[AsyncElement], List[AsyncElement], AsyncElementsResponse]: - if isinstance(res.selectors, dict): - result = {} - for key, selectors in res.selectors.items(): - for selector in selectors: - dendrite_elements = await _get_all_elements_from_selector_soup( - selector, soup, page - ) - if len(dendrite_elements) > 0: - result[key] = dendrite_elements[0] - break - return AsyncElementsResponse(result) - elif isinstance(res.selectors, list): - for selector in reversed(res.selectors): - dendrite_elements = await _get_all_elements_from_selector_soup( - selector, soup, page - ) - - if len(dendrite_elements) > 0: - return dendrite_elements[0] if only_one else dendrite_elements - - return None diff --git a/dendrite/async_api/_core/mixin/extract.py b/dendrite/async_api/_core/mixin/extract.py deleted file mode 100644 index a0c4347..0000000 --- a/dendrite/async_api/_core/mixin/extract.py +++ /dev/null @@ -1,253 +0,0 @@ -import asyncio -import time -from typing import Any, Optional, Type, overload, List -from dendrite.async_api._api.dto.extract_dto import ExtractDTO -from dendrite.async_api._api.response.cache_extract_response import ( - CacheExtractResponse, -) -from dendrite.async_api._api.response.extract_response import ExtractResponse -from dendrite.async_api._core._type_spec import ( - JsonSchema, - PydanticModel, - TypeSpec, - convert_to_type_spec, - to_json_schema, -) -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite.async_api._core._managers.navigation_tracker import NavigationTracker -from loguru import logger - - -CACHE_TIMEOUT = 5 - - -class ExtractionMixin(DendritePageProtocol): - """ - Mixin that provides extraction functionality for web pages. - - This mixin provides various `extract` methods that allow extracting - different types of data (e.g., bool, int, float, string, Pydantic models, etc.) - from a web page based on a given prompt. - """ - - @overload - async def extract( - self, - prompt: str, - type_spec: Type[bool], - use_cache: bool = True, - timeout: int = 180, - ) -> bool: ... - - @overload - async def extract( - self, - prompt: str, - type_spec: Type[int], - use_cache: bool = True, - timeout: int = 180, - ) -> int: ... - - @overload - async def extract( - self, - prompt: str, - type_spec: Type[float], - use_cache: bool = True, - timeout: int = 180, - ) -> float: ... - - @overload - async def extract( - self, - prompt: str, - type_spec: Type[str], - use_cache: bool = True, - timeout: int = 180, - ) -> str: ... - - @overload - async def extract( - self, - prompt: Optional[str], - type_spec: Type[PydanticModel], - use_cache: bool = True, - timeout: int = 180, - ) -> PydanticModel: ... - - @overload - async def extract( - self, - prompt: Optional[str], - type_spec: JsonSchema, - use_cache: bool = True, - timeout: int = 180, - ) -> JsonSchema: ... - - @overload - async def extract( - self, - prompt: str, - type_spec: None = None, - use_cache: bool = True, - timeout: int = 180, - ) -> Any: ... - - async def extract( - self, - prompt: Optional[str], - type_spec: Optional[TypeSpec] = None, - use_cache: bool = True, - timeout: int = 180, - ) -> TypeSpec: - """ - Extract data from a web page based on a prompt and optional type specification. - Args: - prompt (Optional[str]): The prompt to describe the information to extract. - type_spec (Optional[TypeSpec], optional): The type specification for the extracted data. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached scripts before falling back to the - extraction agent for the remaining time that will attempt to generate a new script. Defaults to 15000 (15 seconds). - - Returns: - ExtractResponse: The extracted data wrapped in a ExtractResponse object. - Raises: - TimeoutError: If the extraction process exceeds the specified timeout. - """ - - logger.info(f"Starting extraction with prompt: {prompt}") - - json_schema = None - if type_spec: - json_schema = to_json_schema(type_spec) - logger.debug(f"Type specification converted to JSON schema: {json_schema}") - - if prompt is None: - prompt = "" - - start_time = time.time() - page = await self._get_page() - navigation_tracker = NavigationTracker(page) - navigation_tracker.start_nav_tracking() - - # Check if a script exists in the cache - if use_cache: - cache_available = await check_if_extract_cache_available( - self, prompt, json_schema - ) - - if cache_available: - logger.info("Cache available, attempting to use cached extraction") - result = await attempt_extraction_with_backoff( - self, - prompt, - json_schema, - remaining_timeout=CACHE_TIMEOUT, - only_use_cache=True, - ) - if result: - return convert_and_return_result(result, type_spec) - - logger.info( - "Using extraction agent to perform extraction, since no cache was found or failed." - ) - result = await attempt_extraction_with_backoff( - self, - prompt, - json_schema, - remaining_timeout=timeout - (time.time() - start_time), - only_use_cache=False, - ) - - if result: - return convert_and_return_result(result, type_spec) - - logger.error(f"Extraction failed after {time.time() - start_time:.2f} seconds") - return None - - -async def check_if_extract_cache_available( - obj: DendritePageProtocol, prompt: str, json_schema: Optional[JsonSchema] -) -> bool: - page = await obj._get_page() - page_information = await page.get_page_information(include_screenshot=False) - dto = ExtractDTO( - page_information=page_information, - api_config=obj._get_dendrite_browser().api_config, - prompt=prompt, - return_data_json_schema=json_schema, - ) - cache_response: CacheExtractResponse = ( - await obj._get_browser_api_client().check_extract_cache(dto) - ) - return cache_response.exists - - -async def attempt_extraction_with_backoff( - obj: DendritePageProtocol, - prompt: str, - json_schema: Optional[JsonSchema], - remaining_timeout: float = 180.0, - only_use_cache: bool = False, -) -> Optional[ExtractResponse]: - TIMEOUT_INTERVAL: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0] - total_elapsed_time = 0 - start_time = time.time() - - for current_timeout in TIMEOUT_INTERVAL: - if total_elapsed_time >= remaining_timeout: - logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") - return None - - request_start_time = time.time() - page = await obj._get_page() - page_information = await page.get_page_information( - include_screenshot=not only_use_cache - ) - extract_dto = ExtractDTO( - page_information=page_information, - api_config=obj._get_dendrite_browser().api_config, - prompt=prompt, - return_data_json_schema=json_schema, - use_screenshot=True, - use_cache=only_use_cache, - force_use_cache=only_use_cache, - ) - - res = await obj._get_browser_api_client().extract(extract_dto) - request_duration = time.time() - request_start_time - - if res.status == "impossible": - logger.error(f"Impossible to extract data. Reason: {res.message}") - return None - - if res.status == "success": - logger.success( - f"Extraction successful: '{res.message}'\nUsed cache: {res.used_cache}\nUsed script:\n\n{res.created_script}" - ) - return res - - sleep_duration = max(0, current_timeout - request_duration) - logger.info( - f"Extraction attempt failed. Status: {res.status}\nMessage: {res.message}\nSleeping for {sleep_duration:.2f} seconds" - ) - await asyncio.sleep(sleep_duration) - total_elapsed_time = time.time() - start_time - - logger.error( - f"All extraction attempts failed after {total_elapsed_time:.2f} seconds" - ) - return None - - -def convert_and_return_result( - res: ExtractResponse, type_spec: Optional[TypeSpec] -) -> TypeSpec: - converted_res = res.return_data - if type_spec is not None: - logger.debug("Converting extraction result to specified type") - converted_res = convert_to_type_spec(type_spec, res.return_data) - - logger.info("Extraction process completed successfully") - return converted_res diff --git a/dendrite/async_api/_core/mixin/get_element.py b/dendrite/async_api/_core/mixin/get_element.py deleted file mode 100644 index 4d54f67..0000000 --- a/dendrite/async_api/_core/mixin/get_element.py +++ /dev/null @@ -1,340 +0,0 @@ -import asyncio -import time -from typing import Dict, List, Literal, Optional, Union, overload - -from loguru import logger - -from dendrite.async_api._api.dto.get_elements_dto import GetElementsDTO -from dendrite.async_api._api.response.get_element_response import GetElementResponse -from dendrite.async_api._api.dto.get_elements_dto import CheckSelectorCacheDTO -from dendrite.async_api._core._utils import get_elements_from_selectors_soup -from dendrite.async_api._core.dendrite_element import AsyncElement -from dendrite.async_api._core.models.response import AsyncElementsResponse -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite.async_api._core.models.api_config import APIConfig - - -CACHE_TIMEOUT = 5 - - -class GetElementMixin(DendritePageProtocol): - @overload - async def get_elements( - self, - prompt_or_elements: str, - use_cache: bool = True, - timeout: int = 15000, - context: str = "", - ) -> List[AsyncElement]: - """ - Retrieves a list of Dendrite elements based on a string prompt. - - Args: - prompt_or_elements (str): The prompt describing the elements to be retrieved. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - context (str, optional): Additional context for the retrieval. Defaults to an empty string. - - Returns: - List[AsyncElement]: A list of Dendrite elements found on the page. - """ - - @overload - async def get_elements( - self, - prompt_or_elements: Dict[str, str], - use_cache: bool = True, - timeout: int = 15000, - context: str = "", - ) -> AsyncElementsResponse: - """ - Retrieves Dendrite elements based on a dictionary. - - Args: - prompt_or_elements (Dict[str, str]): A dictionary where keys are field names and values are prompts describing the elements to be retrieved. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - context (str, optional): Additional context for the retrieval. Defaults to an empty string. - - Returns: - AsyncElementsResponse: A response object containing the retrieved elements with attributes matching the keys in the dict. - """ - - async def get_elements( - self, - prompt_or_elements: Union[str, Dict[str, str]], - use_cache: bool = True, - timeout: int = 15000, - context: str = "", - ) -> Union[List[AsyncElement], AsyncElementsResponse]: - """ - Retrieves Dendrite elements based on either a string prompt or a dictionary of prompts. - - This method determines the type of the input (string or dictionary) and retrieves the appropriate elements. - If the input is a string, it fetches a list of elements. If the input is a dictionary, it fetches elements for each key-value pair. - - Args: - prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - context (str, optional): Additional context for the retrieval. Defaults to an empty string. - - Returns: - Union[List[AsyncElement], AsyncElementsResponse]: A list of elements or a response object containing the retrieved elements. - - Raises: - ValueError: If the input is neither a string nor a dictionary. - """ - - return await self._get_element( - prompt_or_elements, - only_one=False, - use_cache=use_cache, - timeout=timeout / 1000, - ) - - async def get_element( - self, - prompt: str, - use_cache=True, - timeout=15000, - ) -> Optional[AsyncElement]: - """ - Retrieves a single Dendrite element based on the provided prompt. - - Args: - prompt (str): The prompt describing the element to be retrieved. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - AsyncElement: The retrieved element. - """ - return await self._get_element( - prompt, - only_one=True, - use_cache=use_cache, - timeout=timeout / 1000, - ) - - @overload - async def _get_element( - self, - prompt_or_elements: str, - only_one: Literal[True], - use_cache: bool, - timeout, - ) -> Optional[AsyncElement]: - """ - Retrieves a single Dendrite element based on the provided prompt. - - Args: - prompt (Union[str, Dict[str, str]]): The prompt describing the element to be retrieved. - only_one (Literal[True]): Indicates that only one element should be retrieved. - use_cache (bool): Whether to use cached results. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - AsyncElement: The retrieved element. - """ - - @overload - async def _get_element( - self, - prompt_or_elements: Union[str, Dict[str, str]], - only_one: Literal[False], - use_cache: bool, - timeout, - ) -> Union[List[AsyncElement], AsyncElementsResponse]: - """ - Retrieves a list of Dendrite elements based on the provided prompt. - - Args: - prompt (str): The prompt describing the elements to be retrieved. - only_one (Literal[False]): Indicates that multiple elements should be retrieved. - use_cache (bool): Whether to use cached results. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - List[AsyncElement]: A list of retrieved elements. - """ - - async def _get_element( - self, - prompt_or_elements: Union[str, Dict[str, str]], - only_one: bool, - use_cache: bool, - timeout: float, - ) -> Union[ - Optional[AsyncElement], - List[AsyncElement], - AsyncElementsResponse, - ]: - """ - Retrieves Dendrite elements based on the provided prompt, either a single element or a list of elements. - - This method sends a request with the prompt and retrieves the elements based on the `only_one` flag. - - Args: - prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval. - only_one (bool): Whether to retrieve only one element or a list of elements. - use_cache (bool): Whether to use cached results. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - Union[AsyncElement, List[AsyncElement], AsyncElementsResponse]: The retrieved element, list of elements, or response object. - """ - - api_config = self._get_dendrite_browser().api_config - start_time = time.time() - - # First, let's check if there is a cached selector - page = await self._get_page() - cache_available = await test_if_cache_available( - self, prompt_or_elements, page.url - ) - - # If we have cached elements, attempt to use them with an exponentation backoff - if cache_available and use_cache == True: - logger.info(f"Cache available, attempting to use cached selectors") - res = await attempt_with_backoff( - self, - prompt_or_elements, - only_one, - api_config, - remaining_timeout=CACHE_TIMEOUT, - only_use_cache=True, - ) - if res: - return res - else: - logger.debug( - f"After attempting to use cached selectors several times without success, let's find the elements using the find element agent." - ) - - # Now that no cached selectors were found or they failed repeatedly, let's use the find element agent to find the requested elements. - logger.info( - "Proceeding to use the find element agent to find the requested elements." - ) - res = await attempt_with_backoff( - self, - prompt_or_elements, - only_one, - api_config, - remaining_timeout=timeout - (time.time() - start_time), - only_use_cache=False, - ) - if res: - return res - - logger.error( - f"Failed to retrieve elements within the specified timeout of {timeout} seconds" - ) - return None - - -async def test_if_cache_available( - obj: DendritePageProtocol, prompt_or_elements: Union[str, Dict[str, str]], url: str -) -> bool: - dto = CheckSelectorCacheDTO( - url=url, - prompt=prompt_or_elements, - ) - cache_available = await obj._get_browser_api_client().check_selector_cache(dto) - - return cache_available.exists - - -async def attempt_with_backoff( - obj: DendritePageProtocol, - prompt_or_elements: Union[str, Dict[str, str]], - only_one: bool, - api_config: APIConfig, - remaining_timeout: float, - only_use_cache: bool = False, -) -> Union[Optional[AsyncElement], List[AsyncElement], AsyncElementsResponse]: - TIMEOUT_INTERVAL: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0] - total_elapsed_time = 0 - start_time = time.time() - - for current_timeout in TIMEOUT_INTERVAL: - if total_elapsed_time >= remaining_timeout: - logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") - return None - - request_start_time = time.time() - page = await obj._get_page() - page_information = await page.get_page_information( - include_screenshot=not only_use_cache - ) - dto = GetElementsDTO( - page_information=page_information, - prompt=prompt_or_elements, - api_config=api_config, - use_cache=only_use_cache, - only_one=only_one, - force_use_cache=only_use_cache, - ) - res = await obj._get_browser_api_client().get_interactions_selector(dto) - request_duration = time.time() - request_start_time - - if res.status == "impossible": - logger.error( - f"Impossible to get elements for '{prompt_or_elements}'. Reason: {res.message}" - ) - return None - - if res.status == "success": - response = await get_elements_from_selectors_soup( - page, await page._get_previous_soup(), res, only_one - ) - if response: - return response - - sleep_duration = max(0, current_timeout - request_duration) - logger.info( - f"Failed to get elements for prompt:\n\n'{prompt_or_elements}'\n\nStatus: {res.status}\n\nMessage: {res.message}\n\nSleeping for {sleep_duration:.2f} seconds" - ) - await asyncio.sleep(sleep_duration) - total_elapsed_time = time.time() - start_time - - logger.error(f"All attempts failed after {total_elapsed_time:.2f} seconds") - return None - - -async def get_elements_from_selectors( - obj: DendritePageProtocol, res: GetElementResponse, only_one: bool -) -> Union[Optional[AsyncElement], List[AsyncElement], AsyncElementsResponse]: - if isinstance(res.selectors, dict): - result = {} - for key, selectors in res.selectors.items(): - for selector in selectors: - page = await obj._get_page() - dendrite_elements = await page._get_all_elements_from_selector(selector) - if len(dendrite_elements) > 0: - result[key] = dendrite_elements[0] - break - return AsyncElementsResponse(result) - elif isinstance(res.selectors, list): - for selector in reversed(res.selectors): - page = await obj._get_page() - dendrite_elements = await page._get_all_elements_from_selector(selector) - - if len(dendrite_elements) > 0: - return dendrite_elements[0] if only_one else dendrite_elements - - return None diff --git a/dendrite/async_api/_core/models/api_config.py b/dendrite/async_api/_core/models/api_config.py deleted file mode 100644 index fd92cac..0000000 --- a/dendrite/async_api/_core/models/api_config.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Optional -from pydantic import BaseModel, model_validator - -from dendrite._common._exceptions.dendrite_exception import MissingApiKeyError - - -class APIConfig(BaseModel): - """ - Configuration model for API keys used in the Dendrite SDK. - - Attributes: - dendrite_api_key (Optional[str]): The API key for Dendrite services. - openai_api_key (Optional[str]): The API key for OpenAI services. If you wish to use your own API key, you can do so by passing it to the AsyncDendrite. - anthropic_api_key (Optional[str]): The API key for Anthropic services. If you wish to use your own API key, you can do so by passing it to the AsyncDendrite. - - Raises: - ValueError: If a valid dendrite_api_key is not provided. - """ - - dendrite_api_key: Optional[str] = None - openai_api_key: Optional[str] = None - anthropic_api_key: Optional[str] = None - - @model_validator(mode="before") - def _check_api_keys(cls, values): - dendrite_api_key = values.get("dendrite_api_key") - - if not dendrite_api_key: - raise MissingApiKeyError( - "A valid dendrite_api_key must be provided. Make sure you have set the DENDRITE_API_KEY environment variable or passed it to the AsyncDendrite." - ) - - return values diff --git a/dendrite/async_api/_core/models/authentication.py b/dendrite/async_api/_core/models/authentication.py deleted file mode 100644 index 3c2656e..0000000 --- a/dendrite/async_api/_core/models/authentication.py +++ /dev/null @@ -1,47 +0,0 @@ -from pydantic import BaseModel -from typing import List, Literal, Optional -from typing_extensions import TypedDict - - -class Cookie(TypedDict, total=False): - name: str - value: str - domain: str - path: str - expires: float - httpOnly: bool - secure: bool - sameSite: Literal["Lax", "None", "Strict"] - - -class LocalStorageEntry(TypedDict): - name: str - value: str - - -class OriginState(TypedDict): - origin: str - localStorage: List[LocalStorageEntry] - - -class StorageState(TypedDict, total=False): - cookies: List[Cookie] - origins: List[OriginState] - - -class DomainState(BaseModel): - domain: str - storage_state: StorageState - - -class AuthSession(BaseModel): - user_agent: Optional[str] - domain_states: List[DomainState] - - def to_storage_state(self) -> StorageState: - cookies = [] - origins = [] - for domain_state in self.domain_states: - cookies.extend(domain_state.storage_state.get("cookies", [])) - origins.extend(domain_state.storage_state.get("origins", [])) - return StorageState(cookies=cookies, origins=origins) diff --git a/dendrite/async_api/_core/models/page_diff_information.py b/dendrite/async_api/_core/models/page_diff_information.py deleted file mode 100644 index 786bbc3..0000000 --- a/dendrite/async_api/_core/models/page_diff_information.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel -from dendrite.async_api._core.models.page_information import PageInformation - - -class PageDiffInformation(BaseModel): - page_before: PageInformation - page_after: PageInformation diff --git a/dendrite/async_api/_core/models/page_information.py b/dendrite/async_api/_core/models/page_information.py deleted file mode 100644 index 67e1909..0000000 --- a/dendrite/async_api/_core/models/page_information.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Dict, Optional -from typing_extensions import TypedDict -from pydantic import BaseModel - - -class InteractableElementInfo(TypedDict): - attrs: Optional[str] - text: Optional[str] - - -class PageInformation(BaseModel): - url: str - raw_html: str - screenshot_base64: str - time_since_frame_navigated: float diff --git a/dendrite/async_api/_core/models/response.py b/dendrite/async_api/_core/models/response.py deleted file mode 100644 index 79b216f..0000000 --- a/dendrite/async_api/_core/models/response.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import Dict, Iterator - -from dendrite.async_api._core.dendrite_element import AsyncElement - - -class AsyncElementsResponse: - """ - AsyncElementsResponse is a class that encapsulates a dictionary of Dendrite elements, - allowing for attribute-style access and other convenient interactions. - - This class is used to store and access the elements retrieved by the `get_elements` function. - The attributes of this class dynamically match the keys of the dictionary passed to the `get_elements` function, - allowing for direct attribute-style access to the corresponding `AsyncElement` objects. - - Attributes: - _data (Dict[str, AsyncElement]): A dictionary where keys are the names of elements and values are the corresponding `AsyncElement` objects. - - Args: - data (Dict[str, AsyncElement]): The dictionary of elements to be encapsulated by the class. - - Methods: - __getattr__(name: str) -> AsyncElement: - Allows attribute-style access to the elements in the dictionary. - - __getitem__(key: str) -> AsyncElement: - Enables dictionary-style access to the elements. - - __iter__() -> Iterator[str]: - Provides an iterator over the keys in the dictionary. - - __repr__() -> str: - Returns a string representation of the class instance. - """ - - _data: Dict[str, AsyncElement] - - def __init__(self, data: Dict[str, AsyncElement]): - self._data = data - - def __getattr__(self, name: str) -> AsyncElement: - try: - return self._data[name] - except KeyError: - raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute '{name}'" - ) - - def __getitem__(self, key: str) -> AsyncElement: - return self._data[key] - - def __iter__(self) -> Iterator[str]: - return iter(self._data) - - def __repr__(self) -> str: - return f"{self.__class__.__name__}({self._data})" diff --git a/dendrite/async_api/_core/protocol/page_protocol.py b/dendrite/async_api/_core/protocol/page_protocol.py deleted file mode 100644 index 2aa9449..0000000 --- a/dendrite/async_api/_core/protocol/page_protocol.py +++ /dev/null @@ -1,20 +0,0 @@ -from typing import TYPE_CHECKING, Protocol - -from dendrite.async_api._api.browser_api_client import BrowserAPIClient - -if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_page import AsyncPage - from dendrite.async_api._core.dendrite_browser import AsyncDendrite - - -class DendritePageProtocol(Protocol): - """ - Protocol that specifies the required methods and attributes - for the `ExtractionMixin` to work. - """ - - def _get_dendrite_browser(self) -> "AsyncDendrite": ... - - def _get_browser_api_client(self) -> BrowserAPIClient: ... - - async def _get_page(self) -> "AsyncPage": ... diff --git a/dendrite/async_api/_dom/util/mild_strip.py b/dendrite/async_api/_dom/util/mild_strip.py deleted file mode 100644 index 54050fb..0000000 --- a/dendrite/async_api/_dom/util/mild_strip.py +++ /dev/null @@ -1,52 +0,0 @@ -from bs4 import BeautifulSoup, Doctype, Tag, Comment - - -def mild_strip(soup: Tag, keep_d_id: bool = True) -> BeautifulSoup: - new_soup = BeautifulSoup(str(soup), "html.parser") - _mild_strip(new_soup, keep_d_id) - return new_soup - - -def mild_strip_in_place(soup: BeautifulSoup, keep_d_id: bool = True) -> None: - _mild_strip(soup, keep_d_id) - - -def _mild_strip(soup: BeautifulSoup, keep_d_id: bool = True) -> None: - for element in soup(text=lambda text: isinstance(text, Comment)): - element.extract() - - # for text in soup.find_all(text=lambda text: isinstance(text, NavigableString)): - # if len(text) > 200: - # text.replace_with(text[:200] + f"... [{len(text)-200} more chars]") - - for tag in soup( - ["head", "script", "style", "path", "polygon", "defs", "svg", "br", "Doctype"] - ): - tag.extract() - - for element in soup.contents: - if isinstance(element, Doctype): - element.extract() - - # for tag in soup.find_all(True): - # tag.attrs = { - # attr: (value[:100] if isinstance(value, str) else value) - # for attr, value in tag.attrs.items() - # } - # if keep_d_id == False: - # del tag["d-id"] - for tag in soup.find_all(True): - if tag.attrs.get("is-interactable-d_id") == "true": - continue - - tag.attrs = { - attr: (value[:100] if isinstance(value, str) else value) - for attr, value in tag.attrs.items() - } - if keep_d_id == False: - del tag["d-id"] - - # if browser != None: - # for elem in list(soup.descendants): - # if isinstance(elem, Tag) and not browser.element_is_visible(elem): - # elem.extract() diff --git a/dendrite/async_api/_api/__init__.py b/dendrite/browser/__init__.py similarity index 100% rename from dendrite/async_api/_api/__init__.py rename to dendrite/browser/__init__.py diff --git a/dendrite/async_api/_api/dto/__init__.py b/dendrite/browser/_common/_exceptions/__init__.py similarity index 100% rename from dendrite/async_api/_api/dto/__init__.py rename to dendrite/browser/_common/_exceptions/__init__.py diff --git a/dendrite/_common/_exceptions/_constants.py b/dendrite/browser/_common/_exceptions/_constants.py similarity index 100% rename from dendrite/_common/_exceptions/_constants.py rename to dendrite/browser/_common/_exceptions/_constants.py diff --git a/dendrite/_common/_exceptions/dendrite_exception.py b/dendrite/browser/_common/_exceptions/dendrite_exception.py similarity index 98% rename from dendrite/_common/_exceptions/dendrite_exception.py rename to dendrite/browser/_common/_exceptions/dendrite_exception.py index 4d62481..ddfdeed 100644 --- a/dendrite/_common/_exceptions/dendrite_exception.py +++ b/dendrite/browser/_common/_exceptions/dendrite_exception.py @@ -5,7 +5,7 @@ from loguru import logger -from dendrite._common._exceptions._constants import INVALID_AUTH_SESSION_MSG +from dendrite.browser._common._exceptions._constants import INVALID_AUTH_SESSION_MSG class BaseDendriteException(Exception): @@ -110,8 +110,6 @@ class IncorrectOutcomeError(BaseDendriteException): Inherits from BaseDendriteException. """ - pass - class BrowserNotLaunchedError(BaseDendriteException): """ diff --git a/dendrite/async_api/_common/constants.py b/dendrite/browser/_common/constants.py similarity index 100% rename from dendrite/async_api/_common/constants.py rename to dendrite/browser/_common/constants.py diff --git a/dendrite/sync_api/_common/status.py b/dendrite/browser/_common/types.py similarity index 100% rename from dendrite/sync_api/_common/status.py rename to dendrite/browser/_common/types.py diff --git a/dendrite/browser/async_api/__init__.py b/dendrite/browser/async_api/__init__.py new file mode 100644 index 0000000..87168f2 --- /dev/null +++ b/dendrite/browser/async_api/__init__.py @@ -0,0 +1,11 @@ +from loguru import logger + +from .dendrite_browser import AsyncDendrite +from .dendrite_element import AsyncElement +from .dendrite_page import AsyncPage + +__all__ = [ + "AsyncDendrite", + "AsyncElement", + "AsyncPage", +] diff --git a/dendrite/async_api/_common/event_sync.py b/dendrite/browser/async_api/_event_sync.py similarity index 92% rename from dendrite/async_api/_common/event_sync.py rename to dendrite/browser/async_api/_event_sync.py index db93358..a953aee 100644 --- a/dendrite/async_api/_common/event_sync.py +++ b/dendrite/browser/async_api/_event_sync.py @@ -1,8 +1,8 @@ -import time import asyncio -from typing import Generic, Optional, Type, TypeVar, Union, cast -from playwright.async_api import Page, Download, FileChooser +import time +from typing import Generic, Optional, Type, TypeVar +from playwright.async_api import Download, FileChooser, Page Events = TypeVar("Events", Download, FileChooser) diff --git a/dendrite/browser/async_api/_utils.py b/dendrite/browser/async_api/_utils.py new file mode 100644 index 0000000..3ccf4f4 --- /dev/null +++ b/dendrite/browser/async_api/_utils.py @@ -0,0 +1,157 @@ +import inspect +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +import tldextract +from bs4 import BeautifulSoup +from loguru import logger +from playwright.async_api import Error, Frame +from pydantic import BaseModel + +from dendrite.models.selector import Selector + +from .dendrite_element import AsyncElement +from .types import PlaywrightPage, TypeSpec + +if TYPE_CHECKING: + from .dendrite_page import AsyncPage + +from dendrite.logic.dom.strip import mild_strip_in_place + +from .js import GENERATE_DENDRITE_IDS_IFRAME_SCRIPT + + +def get_domain_w_suffix(url: str) -> str: + parsed_url = tldextract.extract(url) + if parsed_url.suffix == "": + raise ValueError(f"Invalid URL: {url}") + + return f"{parsed_url.domain}.{parsed_url.suffix}" + + +async def expand_iframes( + page: PlaywrightPage, + page_soup: BeautifulSoup, +): + async def get_iframe_path(frame: Frame): + path_parts = [] + current_frame = frame + while current_frame.parent_frame is not None: + iframe_element = await current_frame.frame_element() + iframe_id = await iframe_element.get_attribute("d-id") + if iframe_id is None: + # If any iframe_id in the path is None, we cannot build the path + return None + path_parts.insert(0, iframe_id) + current_frame = current_frame.parent_frame + return "|".join(path_parts) + + for frame in page.frames: + if frame.parent_frame is None: + continue # Skip the main frame + try: + iframe_element = await frame.frame_element() + + iframe_id = await iframe_element.get_attribute("d-id") + if iframe_id is None: + continue + iframe_path = await get_iframe_path(frame) + except Error as e: + continue + + if iframe_path is None: + continue + + try: + await frame.evaluate( + GENERATE_DENDRITE_IDS_IFRAME_SCRIPT, {"frame_path": iframe_path} + ) + frame_content = await frame.content() + frame_tree = BeautifulSoup(frame_content, "lxml") + mild_strip_in_place(frame_tree) + merge_iframe_to_page(iframe_id, page_soup, frame_tree) + except Error as e: + continue + + +def merge_iframe_to_page( + iframe_id: str, + page: BeautifulSoup, + iframe: BeautifulSoup, +): + iframe_element = page.find("iframe", {"d-id": iframe_id}) + if iframe_element is None: + logger.debug(f"Could not find iframe with ID {iframe_id} in page soup") + return + + iframe_element.replace_with(iframe) + + +async def _get_all_elements_from_selector_soup( + selector: str, soup: BeautifulSoup, page: "AsyncPage" +) -> List[AsyncElement]: + dendrite_elements: List[AsyncElement] = [] + + elements = soup.select(selector) + + for element in elements: + frame = page._get_context(element) + d_id = element.get("d-id", "") + locator = frame.locator(f"xpath=//*[@d-id='{d_id}']") + + if not d_id: + continue + + if isinstance(d_id, list): + d_id = d_id[0] + dendrite_elements.append( + AsyncElement(d_id, locator, page.dendrite_browser, page._browser_api_client) + ) + + return dendrite_elements + + +async def get_elements_from_selectors_soup( + page: "AsyncPage", + soup: BeautifulSoup, + selectors: List[Selector], + only_one: bool, +) -> Union[Optional[AsyncElement], List[AsyncElement]]: + + for selector in reversed(selectors): + dendrite_elements = await _get_all_elements_from_selector_soup( + selector.selector, soup, page + ) + + if len(dendrite_elements) > 0: + return dendrite_elements[0] if only_one else dendrite_elements + + return None + + +def to_json_schema(type_spec: TypeSpec) -> Dict[str, Any]: + if isinstance(type_spec, dict): + # Assume it's already a JSON schema + return type_spec + if inspect.isclass(type_spec) and issubclass(type_spec, BaseModel): + # Convert Pydantic model to JSON schema + return type_spec.model_json_schema() + if type_spec in (bool, int, float, str): + # Convert basic Python types to JSON schema + type_map = {bool: "boolean", int: "integer", float: "number", str: "string"} + return {"type": type_map[type_spec]} + + raise ValueError(f"Unsupported type specification: {type_spec}") + + +def convert_to_type_spec(type_spec: TypeSpec, return_data: Any) -> TypeSpec: + if isinstance(type_spec, type): + if issubclass(type_spec, BaseModel): + return type_spec.model_validate(return_data) + if type_spec in (str, float, bool, int): + return type_spec(return_data) + + raise ValueError(f"Unsupported type: {type_spec}") + if isinstance(type_spec, dict): + return return_data + + raise ValueError(f"Unsupported type specification: {type_spec}") diff --git a/dendrite/async_api/_ext_impl/__init__.py b/dendrite/browser/async_api/browser_impl/__init__.py similarity index 100% rename from dendrite/async_api/_ext_impl/__init__.py rename to dendrite/browser/async_api/browser_impl/__init__.py diff --git a/dendrite/async_api/_ext_impl/browserbase/__init__.py b/dendrite/browser/async_api/browser_impl/browserbase/__init__.py similarity index 100% rename from dendrite/async_api/_ext_impl/browserbase/__init__.py rename to dendrite/browser/async_api/browser_impl/browserbase/__init__.py diff --git a/dendrite/async_api/_ext_impl/browserbase/_client.py b/dendrite/browser/async_api/browser_impl/browserbase/_client.py similarity index 97% rename from dendrite/async_api/_ext_impl/browserbase/_client.py rename to dendrite/browser/async_api/browser_impl/browserbase/_client.py index 0689641..b29b607 100644 --- a/dendrite/async_api/_ext_impl/browserbase/_client.py +++ b/dendrite/browser/async_api/browser_impl/browserbase/_client.py @@ -1,11 +1,12 @@ import asyncio -from pathlib import Path import time +from pathlib import Path from typing import Optional, Union + import httpx from loguru import logger -from dendrite._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException class BrowserbaseClient: diff --git a/dendrite/async_api/_ext_impl/browserbase/_download.py b/dendrite/browser/async_api/browser_impl/browserbase/_download.py similarity index 92% rename from dendrite/async_api/_ext_impl/browserbase/_download.py rename to dendrite/browser/async_api/browser_impl/browserbase/_download.py index d18561c..7a92880 100644 --- a/dendrite/async_api/_ext_impl/browserbase/_download.py +++ b/dendrite/browser/async_api/browser_impl/browserbase/_download.py @@ -1,13 +1,16 @@ -from pathlib import Path import re import shutil -from typing import Union import zipfile +from pathlib import Path +from typing import Union + from loguru import logger from playwright.async_api import Download -from dendrite.async_api._core.models.download_interface import DownloadInterface -from dendrite.async_api._ext_impl.browserbase._client import BrowserbaseClient +from dendrite.browser.async_api.browser_impl.browserbase._client import ( + BrowserbaseClient, +) +from dendrite.browser.async_api.protocol.download_protocol import DownloadInterface class AsyncBrowserbaseDownload(DownloadInterface): diff --git a/dendrite/async_api/_ext_impl/browserbase/_impl.py b/dendrite/browser/async_api/browser_impl/browserbase/_impl.py similarity index 80% rename from dendrite/async_api/_ext_impl/browserbase/_impl.py rename to dendrite/browser/async_api/browser_impl/browserbase/_impl.py index c67846e..b44b219 100644 --- a/dendrite/async_api/_ext_impl/browserbase/_impl.py +++ b/dendrite/browser/async_api/browser_impl/browserbase/_impl.py @@ -1,21 +1,23 @@ from typing import TYPE_CHECKING, Optional -from dendrite._common._exceptions.dendrite_exception import BrowserNotLaunchedError -from dendrite.async_api._core._impl_browser import ImplBrowser -from dendrite.async_api._core._type_spec import PlaywrightPage -from dendrite.remote.browserbase_config import BrowserbaseConfig + +from dendrite.browser._common._exceptions.dendrite_exception import ( + BrowserNotLaunchedError, +) +from dendrite.browser.async_api.protocol.browser_protocol import BrowserProtocol +from dendrite.browser.async_api.types import PlaywrightPage +from dendrite.browser.remote.browserbase_config import BrowserbaseConfig if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_browser import AsyncDendrite -from dendrite.async_api._ext_impl.browserbase._client import BrowserbaseClient -from playwright.async_api import Playwright + from dendrite.browser.async_api.dendrite_browser import AsyncDendrite + from loguru import logger +from playwright.async_api import Playwright -from dendrite.async_api._ext_impl.browserbase._download import ( - AsyncBrowserbaseDownload, -) +from ._client import BrowserbaseClient +from ._download import AsyncBrowserbaseDownload -class BrowserBaseImpl(ImplBrowser): +class BrowserbaseImpl(BrowserProtocol): def __init__(self, settings: BrowserbaseConfig) -> None: self.settings = settings self._client = BrowserbaseClient( diff --git a/dendrite/async_api/_ext_impl/browserless/__init__.py b/dendrite/browser/async_api/browser_impl/browserless/__init__.py similarity index 100% rename from dendrite/async_api/_ext_impl/browserless/__init__.py rename to dendrite/browser/async_api/browser_impl/browserless/__init__.py diff --git a/dendrite/async_api/_ext_impl/browserless/_impl.py b/dendrite/browser/async_api/browser_impl/browserless/_impl.py similarity index 71% rename from dendrite/async_api/_ext_impl/browserless/_impl.py rename to dendrite/browser/async_api/browser_impl/browserless/_impl.py index e5b87b4..698557d 100644 --- a/dendrite/async_api/_ext_impl/browserless/_impl.py +++ b/dendrite/browser/async_api/browser_impl/browserless/_impl.py @@ -1,23 +1,30 @@ import json from typing import TYPE_CHECKING, Optional -from dendrite._common._exceptions.dendrite_exception import BrowserNotLaunchedError -from dendrite.async_api._core._impl_browser import ImplBrowser -from dendrite.async_api._core._type_spec import PlaywrightPage -from dendrite.remote.browserless_config import BrowserlessConfig + +from dendrite.browser._common._exceptions.dendrite_exception import ( + BrowserNotLaunchedError, +) +from dendrite.browser.async_api.protocol.browser_protocol import BrowserProtocol +from dendrite.browser.async_api.types import PlaywrightPage +from dendrite.browser.remote.browserless_config import BrowserlessConfig if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_browser import AsyncDendrite -from dendrite.async_api._ext_impl.browserbase._client import BrowserbaseClient -from playwright.async_api import Playwright -from loguru import logger + from dendrite.browser.async_api.dendrite_browser import AsyncDendrite + import urllib.parse -from dendrite.async_api._ext_impl.browserbase._download import ( +from loguru import logger +from playwright.async_api import Playwright + +from dendrite.browser.async_api.browser_impl.browserbase._client import ( + BrowserbaseClient, +) +from dendrite.browser.async_api.browser_impl.browserbase._download import ( AsyncBrowserbaseDownload, ) -class BrowserlessImpl(ImplBrowser): +class BrowserlessImpl(BrowserProtocol): def __init__(self, settings: BrowserlessConfig) -> None: self.settings = settings self._session_id: Optional[str] = None diff --git a/dendrite/browser/async_api/browser_impl/impl_mapping.py b/dendrite/browser/async_api/browser_impl/impl_mapping.py new file mode 100644 index 0000000..d588769 --- /dev/null +++ b/dendrite/browser/async_api/browser_impl/impl_mapping.py @@ -0,0 +1,34 @@ +from typing import Dict, Optional, Type + +from dendrite.browser.remote import Providers +from dendrite.browser.remote.browserbase_config import BrowserbaseConfig +from dendrite.browser.remote.browserless_config import BrowserlessConfig + +from ..protocol.browser_protocol import BrowserProtocol +from .browserbase._impl import BrowserbaseImpl +from .browserless._impl import BrowserlessImpl +from .local._impl import LocalImpl + +IMPL_MAPPING: Dict[Type[Providers], Type[BrowserProtocol]] = { + BrowserbaseConfig: BrowserbaseImpl, + BrowserlessConfig: BrowserlessImpl, +} + +SETTINGS_CLASSES: Dict[str, Type[Providers]] = { + "browserbase": BrowserbaseConfig, + "browserless": BrowserlessConfig, +} + + +def get_impl(remote_provider: Optional[Providers]) -> BrowserProtocol: + if remote_provider is None: + return LocalImpl() + + try: + provider_class = IMPL_MAPPING[type(remote_provider)] + except KeyError: + raise ValueError( + f"No implementation for {type(remote_provider)}. Available providers: {', '.join(map(lambda x: x.__name__, IMPL_MAPPING.keys()))}" + ) + + return provider_class(remote_provider) diff --git a/dendrite/browser/async_api/browser_impl/local/_impl.py b/dendrite/browser/async_api/browser_impl/local/_impl.py new file mode 100644 index 0000000..ebc5010 --- /dev/null +++ b/dendrite/browser/async_api/browser_impl/local/_impl.py @@ -0,0 +1,52 @@ +from pathlib import Path +from typing import TYPE_CHECKING, Optional, Union, overload + +from loguru import logger +from typing_extensions import Literal + +from dendrite.browser._common.constants import STEALTH_ARGS + +if TYPE_CHECKING: + from dendrite.browser.async_api.dendrite_browser import AsyncDendrite + +import os +import shutil +import tempfile + +from playwright.async_api import ( + Browser, + BrowserContext, + Download, + Playwright, + StorageState, +) + +from dendrite.browser.async_api.protocol.browser_protocol import BrowserProtocol +from dendrite.browser.async_api.types import PlaywrightPage + + +class LocalImpl(BrowserProtocol): + def __init__(self) -> None: + pass + + async def start_browser( + self, + playwright: Playwright, + pw_options: dict, + storage_state: Optional[StorageState] = None, + ) -> Browser: + return await playwright.chromium.launch(**pw_options) + + async def get_download( + self, + dendrite_browser: "AsyncDendrite", + pw_page: PlaywrightPage, + timeout: float, + ) -> Download: + return await dendrite_browser._download_handler.get_data(pw_page, timeout) + + async def configure_context(self, browser: "AsyncDendrite"): + pass + + async def stop_session(self): + pass diff --git a/dendrite/async_api/_core/dendrite_browser.py b/dendrite/browser/async_api/dendrite_browser.py similarity index 66% rename from dendrite/async_api/_core/dendrite_browser.py rename to dendrite/browser/async_api/dendrite_browser.py index 07722ee..ca51778 100644 --- a/dendrite/async_api/_core/dendrite_browser.py +++ b/dendrite/browser/async_api/dendrite_browser.py @@ -1,53 +1,48 @@ -from abc import ABC, abstractmethod +import os import pathlib import re -from typing import Any, List, Literal, Optional, Sequence, Union +from abc import ABC +from typing import Any, List, Optional, Sequence, Union from uuid import uuid4 -import os + from loguru import logger from playwright.async_api import ( - async_playwright, - Playwright, - BrowserContext, - FileChooser, Download, Error, + FileChooser, FilePayload, + StorageState, + async_playwright, ) -from dendrite.async_api._api.dto.authenticate_dto import AuthenticateDTO -from dendrite.async_api._api.dto.upload_auth_session_dto import UploadAuthSessionDTO -from dendrite.async_api._common.event_sync import EventSync -from dendrite.async_api._core._impl_browser import ImplBrowser -from dendrite.async_api._core._impl_mapping import get_impl -from dendrite.async_api._core._managers.page_manager import ( - PageManager, -) - -from dendrite.async_api._core._type_spec import PlaywrightPage -from dendrite.async_api._core.dendrite_page import AsyncPage -from dendrite.async_api._common.constants import STEALTH_ARGS -from dendrite.async_api._core.mixin.ask import AskMixin -from dendrite.async_api._core.mixin.click import ClickMixin -from dendrite.async_api._core.mixin.extract import ExtractionMixin -from dendrite.async_api._core.mixin.fill_fields import FillFieldsMixin -from dendrite.async_api._core.mixin.get_element import GetElementMixin -from dendrite.async_api._core.mixin.keyboard import KeyboardMixin -from dendrite.async_api._core.mixin.screenshot import ScreenshotMixin -from dendrite.async_api._core.mixin.wait_for import WaitForMixin -from dendrite.async_api._core.mixin.markdown import MarkdownMixin -from dendrite.async_api._core.models.authentication import ( - AuthSession, -) - -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._api.browser_api_client import BrowserAPIClient -from dendrite._common._exceptions.dendrite_exception import ( +from dendrite.browser._common._exceptions.dendrite_exception import ( BrowserNotLaunchedError, DendriteException, IncorrectOutcomeError, ) -from dendrite.remote import Providers +from dendrite.browser._common.constants import STEALTH_ARGS +from dendrite.browser.async_api._utils import get_domain_w_suffix +from dendrite.browser.remote import Providers +from dendrite.logic.config import Config +from dendrite.logic import AsyncLogicEngine + +from ._event_sync import EventSync +from .browser_impl.impl_mapping import get_impl +from .dendrite_page import AsyncPage +from .manager.page_manager import PageManager +from .mixin import ( + AskMixin, + ClickMixin, + ExtractionMixin, + FillFieldsMixin, + GetElementMixin, + KeyboardMixin, + MarkdownMixin, + ScreenshotMixin, + WaitForMixin, +) +from .protocol.browser_protocol import BrowserProtocol +from .types import PlaywrightPage class AsyncDendrite( @@ -87,51 +82,36 @@ class AsyncDendrite( def __init__( self, - auth: Optional[Union[str, List[str]]] = None, - dendrite_api_key: Optional[str] = None, - openai_api_key: Optional[str] = None, - anthropic_api_key: Optional[str] = None, playwright_options: Any = { "headless": False, "args": STEALTH_ARGS, }, remote_config: Optional[Providers] = None, + config: Optional[Config] = None, + auth: Optional[Union[List[str], str]] = None, ): """ - Initializes AsyncDendrite with API keys and Playwright options. + Initialize AsyncDendrite with optional domain authentication. Args: - auth (Optional[Union[str, List[str]]]): The domains on which the browser should try and authenticate. - dendrite_api_key (Optional[str]): The Dendrite API key. If not provided, it's fetched from the environment variables. - openai_api_key (Optional[str]): Your own OpenAI API key, provide it, along with other custom API keys, if you wish to use Dendrite without paying for a license. - anthropic_api_key (Optional[str]): The own Anthropic API key, provide it, along with other custom API keys, if you wish to use Dendrite without paying for a license. - playwright_options (Any): Options for configuring Playwright. Defaults to running in non-headless mode with stealth arguments. - - Raises: - MissingApiKeyError: If the Dendrite API key is not provided or found in the environment variables. + playwright_options: Options for configuring Playwright + remote_config: Remote browser provider configuration + config: Configuration object + auth: List of domains or single domain to load authentication state for """ - - api_config = APIConfig( - dendrite_api_key=dendrite_api_key or os.environ.get("DENDRITE_API_KEY"), - openai_api_key=openai_api_key, - anthropic_api_key=anthropic_api_key, - ) - self._impl = self._get_impl(remote_config) - - self.api_config = api_config - self.playwright: Optional[Playwright] = None - self.browser_context: Optional[BrowserContext] = None + self._playwright_options = playwright_options + self._config = config or Config() + auth_url = [auth] if isinstance(auth, str) else auth or [] + self._auth_domains = [get_domain_w_suffix(url) for url in auth_url] self._id = uuid4().hex - self._playwright_options = playwright_options self._active_page_manager: Optional[PageManager] = None self._user_id: Optional[str] = None self._upload_handler = EventSync(event_type=FileChooser) self._download_handler = EventSync(event_type=Download) self.closed = False - self._auth = auth - self._browser_api_client = BrowserAPIClient(api_config, self._id) + self._browser_api_client: AsyncLogicEngine = AsyncLogicEngine(self._config) @property def pages(self) -> List[AsyncPage]: @@ -150,10 +130,12 @@ async def _get_page(self) -> AsyncPage: active_page = await self.get_active_page() return active_page - def _get_browser_api_client(self) -> BrowserAPIClient: + @property + def logic_engine(self) -> AsyncLogicEngine: return self._browser_api_client - def _get_dendrite_browser(self) -> "AsyncDendrite": + @property + def dendrite_browser(self) -> "AsyncDendrite": return self async def __aenter__(self): @@ -163,15 +145,10 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): # Ensure cleanup is handled await self.close() - def _get_impl(self, remote_provider: Optional[Providers]) -> ImplBrowser: + def _get_impl(self, remote_provider: Optional[Providers]) -> BrowserProtocol: # if remote_provider is None:) return get_impl(remote_provider) - async def _get_auth_session(self, domains: Union[str, list[str]]): - dto = AuthenticateDTO(domains=domains) - auth_session: AuthSession = await self._browser_api_client.authenticate(dto) - return auth_session - async def get_active_page(self) -> AsyncPage: """ Retrieves the currently active page managed by the PageManager. @@ -294,18 +271,23 @@ async def _launch(self): os.environ["PW_TEST_SCREENSHOT_NO_FONTS_READY"] = "1" self._playwright = await async_playwright().start() - # browser = await self._playwright.chromium.launch(**self._playwright_options) + # Get and merge storage states for authenticated domains + storage_states = [] + for domain in self._auth_domains: + state = await self._get_domain_storage_state(domain) + if state: + storage_states.append(state) + + # Launch browser browser = await self._impl.start_browser( self._playwright, self._playwright_options ) - if self._auth: - auth_session = await self._get_auth_session(self._auth) - self.browser_context = await browser.new_context( - storage_state=auth_session.to_storage_state(), - user_agent=auth_session.user_agent, - ) + # Create context with merged storage state if available + if storage_states: + merged_state = await self._merge_storage_states(storage_states) + self.browser_context = await browser.new_context(storage_state=merged_state) else: self.browser_context = ( browser.contexts[0] @@ -314,7 +296,6 @@ async def _launch(self): ) self._active_page_manager = PageManager(self, self.browser_context) - await self._impl.configure_context(self) return browser, self.browser_context, self._active_page_manager @@ -336,38 +317,34 @@ async def add_cookies(self, cookies): async def close(self): """ - Closes the browser and uploads authentication session data if available. + Closes the browser and updates storage states for authenticated domains before cleanup. - This method stops the Playwright instance, closes the browser context, and uploads any - stored authentication session data if applicable. + This method updates the storage states for authenticated domains, stops the Playwright + instance, and closes the browser context. Returns: None Raises: - Exception: If there is an issue closing the browser or uploading session data. + Exception: If there is an issue closing the browser or updating session data. """ - self.closed = True + try: - if self.browser_context: - if self._auth: - auth_session = await self._get_auth_session(self._auth) - storage_state = await self.browser_context.storage_state() - dto = UploadAuthSessionDTO( - auth_data=auth_session, storage_state=storage_state - ) - await self._browser_api_client.upload_auth_session(dto) + if self.browser_context and self._auth_domains: + # Update storage state for each authenticated domain + for domain in self._auth_domains: + await self.save_auth(domain) + await self._impl.stop_session() await self.browser_context.close() except Error: pass + try: if self._playwright: await self._playwright.stop() - except AttributeError: - pass - except Exception: + except (AttributeError, Exception): pass def _is_launched(self): @@ -464,3 +441,111 @@ async def _get_filechooser( Exception: If there is an issue uploading files. """ return await self._upload_handler.get_data(pw_page, timeout=timeout) + + async def save_auth(self, url: str) -> None: + """ + Save authentication state for a specific domain. + + Args: + domain (str): Domain to save authentication for (e.g., "github.com") + """ + if not self.browser_context: + raise DendriteException("Browser context not initialized") + + domain = get_domain_w_suffix(url) + + # Get current storage state + storage_state = await self.browser_context.storage_state() + + # Filter storage state for specific domain + filtered_state = { + "origins": [ + origin + for origin in storage_state.get("origins", []) + if domain in origin.get("origin", "") + ], + "cookies": [ + cookie + for cookie in storage_state.get("cookies", []) + if domain in cookie.get("domain", "") + ], + } + + # Save to cache + self._config.storage_cache.set( + {"domain": domain}, StorageState(**filtered_state) + ) + + async def setup_auth( + self, + url: str, + message: str = "Please log in to the website. Once done, press Enter to continue...", + ) -> None: + """ + Set up authentication for a specific URL. + + Args: + url (str): URL to navigate to for login + message (str): Message to show while waiting for user input + """ + # Extract domain from URL + # domain = urlparse(url).netloc + # if not domain: + # domain = urlparse(f"https://{url}").netloc + + domain = get_domain_w_suffix(url) + + try: + # Start Playwright + self._playwright = await async_playwright().start() + + # Launch browser with normal context + browser = await self._impl.start_browser( + self._playwright, {**self._playwright_options, "headless": False} + ) + + self.browser_context = await browser.new_context() + self._active_page_manager = PageManager(self, self.browser_context) + + # Navigate to login page + await self.goto(url) + + # Wait for user to complete login + print(message) + input() + + # Save the storage state for this domain + await self.save_auth(domain) + + finally: + # Clean up + await self.close() + + async def _get_domain_storage_state(self, domain: str) -> Optional[StorageState]: + """Get storage state for a specific domain""" + return self._config.storage_cache.get({"domain": domain}, index=0) + + async def _merge_storage_states(self, states: List[StorageState]) -> StorageState: + """Merge multiple storage states into one""" + merged = {"origins": [], "cookies": []} + seen_origins = set() + seen_cookies = set() + + for state in states: + # Merge origins + for origin in state.get("origins", []): + origin_key = origin.get("origin", "") + if origin_key not in seen_origins: + merged["origins"].append(origin) + seen_origins.add(origin_key) + + # Merge cookies + for cookie in state.get("cookies", []): + cookie_key = ( + f"{cookie.get('name')}:{cookie.get('domain')}:{cookie.get('path')}" + ) + if cookie_key not in seen_cookies: + merged["cookies"].append(cookie) + seen_cookies.add(cookie_key) + + return StorageState(**merged) diff --git a/dendrite/async_api/_core/dendrite_element.py b/dendrite/browser/async_api/dendrite_element.py similarity index 86% rename from dendrite/async_api/_core/dendrite_element.py rename to dendrite/browser/async_api/dendrite_element.py index e4e4fed..bca3159 100644 --- a/dendrite/async_api/_core/dendrite_element.py +++ b/dendrite/browser/async_api/dendrite_element.py @@ -1,4 +1,5 @@ from __future__ import annotations + import asyncio import base64 import functools @@ -8,20 +9,19 @@ from loguru import logger from playwright.async_api import Locator -from dendrite.async_api._api.browser_api_client import BrowserAPIClient -from dendrite._common._exceptions.dendrite_exception import IncorrectOutcomeError +from dendrite.browser._common._exceptions.dendrite_exception import ( + IncorrectOutcomeError, +) +from dendrite.logic import AsyncLogicEngine if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_browser import AsyncDendrite -from dendrite.async_api._core._managers.navigation_tracker import NavigationTracker -from dendrite.async_api._core.models.page_diff_information import ( - PageDiffInformation, -) -from dendrite.async_api._core._type_spec import Interaction -from dendrite.async_api._api.response.interaction_response import ( - InteractionResponse, -) -from dendrite.async_api._api.dto.make_interaction_dto import MakeInteractionDTO + from .dendrite_browser import AsyncDendrite + +from dendrite.models.dto.make_interaction_dto import VerifyActionDTO +from dendrite.models.response.interaction_response import InteractionResponse + +from .manager.navigation_tracker import NavigationTracker +from .types import Interaction def perform_action(interaction_type: Interaction): @@ -51,11 +51,11 @@ async def wrapper( await func(self, *args, **kwargs) return InteractionResponse(status="success", message="") - api_config = self._dendrite_browser.api_config - page_before = await self._dendrite_browser.get_active_page() page_before_info = await page_before.get_page_information() - + soup = await page_before._get_previous_soup() + screenshot_before = page_before_info.screenshot_base64 + tag_name = soup.find(attrs={"d-id": self.dendrite_id}) # Call the original method here await func( self, @@ -67,25 +67,24 @@ async def wrapper( await self._wait_for_page_changes(page_before.url) page_after = await self._dendrite_browser.get_active_page() - page_after_info = await page_after.get_page_information() - page_delta_information = PageDiffInformation( - page_before=page_before_info, page_after=page_after_info + screenshot_after = ( + await page_after.screenshot_manager.take_full_page_screenshot() ) - dto = MakeInteractionDTO( + dto = VerifyActionDTO( url=page_before.url, dendrite_id=self.dendrite_id, interaction_type=interaction_type, expected_outcome=expected_outcome, - page_delta_information=page_delta_information, - api_config=api_config, + screenshot_before=screenshot_before, + screenshot_after=screenshot_after, + tag_name=str(tag_name), ) - res = await self._browser_api_client.make_interaction(dto) + res = await self._browser_api_client.verify_action(dto) if res.status == "failed": raise IncorrectOutcomeError( - message=res.message, - screenshot_base64=page_delta_information.page_after.screenshot_base64, + message=res.message, screenshot_base64=screenshot_after ) return res @@ -108,7 +107,7 @@ def __init__( dendrite_id: str, locator: Locator, dendrite_browser: AsyncDendrite, - browser_api_client: BrowserAPIClient, + browser_api_client: AsyncLogicEngine, ): """ Initialize a AsyncElement. diff --git a/dendrite/async_api/_core/dendrite_page.py b/dendrite/browser/async_api/dendrite_page.py similarity index 88% rename from dendrite/async_api/_core/dendrite_page.py rename to dendrite/browser/async_api/dendrite_page.py index 7d45eeb..f2b78e2 100644 --- a/dendrite/async_api/_core/dendrite_page.py +++ b/dendrite/browser/async_api/dendrite_page.py @@ -1,57 +1,35 @@ -import re import asyncio import pathlib +import re import time - -from typing import ( - TYPE_CHECKING, - Any, - List, - Literal, - Optional, - Sequence, - Union, -) +from typing import TYPE_CHECKING, Any, List, Literal, Optional, Sequence, Union from bs4 import BeautifulSoup, Tag from loguru import logger - -from playwright.async_api import ( - FrameLocator, - Keyboard, - Download, - FilePayload, -) - - -from dendrite.async_api._api.browser_api_client import BrowserAPIClient -from dendrite.async_api._core._js import GENERATE_DENDRITE_IDS_SCRIPT -from dendrite.async_api._core._type_spec import PlaywrightPage -from dendrite.async_api._core.dendrite_element import AsyncElement -from dendrite.async_api._core.mixin.ask import AskMixin -from dendrite.async_api._core.mixin.click import ClickMixin -from dendrite.async_api._core.mixin.extract import ExtractionMixin -from dendrite.async_api._core.mixin.fill_fields import FillFieldsMixin -from dendrite.async_api._core.mixin.get_element import GetElementMixin -from dendrite.async_api._core.mixin.keyboard import KeyboardMixin -from dendrite.async_api._core.mixin.markdown import MarkdownMixin -from dendrite.async_api._core.mixin.wait_for import WaitForMixin -from dendrite.async_api._core.models.page_information import PageInformation - +from playwright.async_api import Download, FilePayload, FrameLocator, Keyboard + +from dendrite.logic import AsyncLogicEngine +from dendrite.models.page_information import PageInformation + +from .dendrite_element import AsyncElement +from .js import GENERATE_DENDRITE_IDS_SCRIPT +from .mixin.ask import AskMixin +from .mixin.click import ClickMixin +from .mixin.extract import ExtractionMixin +from .mixin.fill_fields import FillFieldsMixin +from .mixin.get_element import GetElementMixin +from .mixin.keyboard import KeyboardMixin +from .mixin.markdown import MarkdownMixin +from .mixin.wait_for import WaitForMixin +from .types import PlaywrightPage if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_browser import AsyncDendrite - + from .dendrite_browser import AsyncDendrite -from dendrite.async_api._core._managers.screenshot_manager import ScreenshotManager -from dendrite._common._exceptions.dendrite_exception import ( - DendriteException, -) +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException - -from dendrite.async_api._core._utils import ( - expand_iframes, -) +from ._utils import expand_iframes +from .manager.screenshot_manager import ScreenshotManager class AsyncPage( @@ -75,14 +53,14 @@ def __init__( self, page: PlaywrightPage, dendrite_browser: "AsyncDendrite", - browser_api_client: "BrowserAPIClient", + browser_api_client: AsyncLogicEngine, ): self.playwright_page = page self.screenshot_manager = ScreenshotManager(page) - self.dendrite_browser = dendrite_browser self._browser_api_client = browser_api_client self._last_main_frame_url = page.url self._last_frame_navigated_timestamp = time.time() + self._dendrite_browser = dendrite_browser self.playwright_page.on("framenavigated", self._on_frame_navigated) @@ -91,6 +69,10 @@ def _on_frame_navigated(self, frame): self._last_main_frame_url = frame.url self._last_frame_navigated_timestamp = time.time() + @property + def dendrite_browser(self) -> "AsyncDendrite": + return self._dendrite_browser + @property def url(self): """ @@ -114,10 +96,8 @@ def keyboard(self) -> Keyboard: async def _get_page(self) -> "AsyncPage": return self - def _get_dendrite_browser(self) -> "AsyncDendrite": - return self.dendrite_browser - - def _get_browser_api_client(self) -> BrowserAPIClient: + @property + def logic_engine(self) -> AsyncLogicEngine: return self._browser_api_client async def goto( @@ -292,7 +272,7 @@ async def _generate_dendrite_ids(self): await self.playwright_page.wait_for_load_state( state="load", timeout=3000 ) - logger.debug( + logger.exception( f"Failed to generate dendrite IDs: {e}, attempt {tries+1}/3" ) tries += 1 diff --git a/dendrite/async_api/_core/_js/__init__.py b/dendrite/browser/async_api/js/__init__.py similarity index 100% rename from dendrite/async_api/_core/_js/__init__.py rename to dendrite/browser/async_api/js/__init__.py diff --git a/dendrite/async_api/_core/_js/eventListenerPatch.js b/dendrite/browser/async_api/js/eventListenerPatch.js similarity index 100% rename from dendrite/async_api/_core/_js/eventListenerPatch.js rename to dendrite/browser/async_api/js/eventListenerPatch.js diff --git a/dendrite/sync_api/_core/_js/generateDendriteIDs.js b/dendrite/browser/async_api/js/generateDendriteIDs.js similarity index 97% rename from dendrite/sync_api/_core/_js/generateDendriteIDs.js rename to dendrite/browser/async_api/js/generateDendriteIDs.js index 1d4b348..d03b8cd 100644 --- a/dendrite/sync_api/_core/_js/generateDendriteIDs.js +++ b/dendrite/browser/async_api/js/generateDendriteIDs.js @@ -9,6 +9,7 @@ var hashCode = (str) => { return hash; } + const getElementIndex = (element) => { let index = 1; let sibling = element.previousElementSibling; @@ -42,7 +43,8 @@ const usedHashes = new Map(); var markHidden = (hidden_element) => { // Mark the hidden element itself - hidden + hidden_element.setAttribute('data-hidden', 'true'); + } document.querySelectorAll('*').forEach((element, index) => { diff --git a/dendrite/async_api/_core/_js/generateDendriteIDsIframe.js b/dendrite/browser/async_api/js/generateDendriteIDsIframe.js similarity index 100% rename from dendrite/async_api/_core/_js/generateDendriteIDsIframe.js rename to dendrite/browser/async_api/js/generateDendriteIDsIframe.js diff --git a/dendrite/async_api/_api/response/__init__.py b/dendrite/browser/async_api/manager/__init__.py similarity index 100% rename from dendrite/async_api/_api/response/__init__.py rename to dendrite/browser/async_api/manager/__init__.py diff --git a/dendrite/async_api/_core/_managers/navigation_tracker.py b/dendrite/browser/async_api/manager/navigation_tracker.py similarity index 97% rename from dendrite/async_api/_core/_managers/navigation_tracker.py rename to dendrite/browser/async_api/manager/navigation_tracker.py index dc80337..2ae51aa 100644 --- a/dendrite/async_api/_core/_managers/navigation_tracker.py +++ b/dendrite/browser/async_api/manager/navigation_tracker.py @@ -1,10 +1,9 @@ import asyncio import time - from typing import TYPE_CHECKING, Dict, Optional if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_page import AsyncPage + from ..dendrite_page import AsyncPage class NavigationTracker: diff --git a/dendrite/async_api/_core/_managers/page_manager.py b/dendrite/browser/async_api/manager/page_manager.py similarity index 79% rename from dendrite/async_api/_core/_managers/page_manager.py rename to dendrite/browser/async_api/manager/page_manager.py index 0d30cbf..e9069af 100644 --- a/dendrite/async_api/_core/_managers/page_manager.py +++ b/dendrite/browser/async_api/manager/page_manager.py @@ -1,12 +1,13 @@ -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from loguru import logger from playwright.async_api import BrowserContext, Download, FileChooser if TYPE_CHECKING: - from dendrite.async_api._core.dendrite_browser import AsyncDendrite -from dendrite.async_api._core._type_spec import PlaywrightPage -from dendrite.async_api._core.dendrite_page import AsyncPage + from ..dendrite_browser import AsyncDendrite + +from ..dendrite_page import AsyncPage +from ..types import PlaywrightPage class PageManager: @@ -16,6 +17,17 @@ def __init__(self, dendrite_browser, browser_context: BrowserContext): self.browser_context = browser_context self.dendrite_browser: AsyncDendrite = dendrite_browser + # Handle existing pages in the context + existing_pages = browser_context.pages + if existing_pages: + for page in existing_pages: + client = self.dendrite_browser.logic_engine + dendrite_page = AsyncPage(page, self.dendrite_browser, client) + self.pages.append(dendrite_page) + # Set the first existing page as active + if self.active_page is None: + self.active_page = dendrite_page + browser_context.on("page", self._page_on_open_handler) async def new_page(self) -> AsyncPage: @@ -25,7 +37,7 @@ async def new_page(self) -> AsyncPage: if self.active_page and new_page == self.active_page.playwright_page: return self.active_page - client = self.dendrite_browser._get_browser_api_client() + client = self.dendrite_browser.logic_engine dendrite_page = AsyncPage(new_page, self.dendrite_browser, client) self.pages.append(dendrite_page) self.active_page = dendrite_page @@ -75,7 +87,7 @@ def _page_on_open_handler(self, page: PlaywrightPage): page.on("download", self._page_on_download_handler) page.on("filechooser", self._page_on_filechooser_handler) - client = self.dendrite_browser._get_browser_api_client() + client = self.dendrite_browser.logic_engine dendrite_page = AsyncPage(page, self.dendrite_browser, client) self.pages.append(dendrite_page) self.active_page = dendrite_page diff --git a/dendrite/async_api/_core/_managers/screenshot_manager.py b/dendrite/browser/async_api/manager/screenshot_manager.py similarity index 97% rename from dendrite/async_api/_core/_managers/screenshot_manager.py rename to dendrite/browser/async_api/manager/screenshot_manager.py index 6fce4b1..2c2613c 100644 --- a/dendrite/async_api/_core/_managers/screenshot_manager.py +++ b/dendrite/browser/async_api/manager/screenshot_manager.py @@ -2,7 +2,7 @@ import os from uuid import uuid4 -from dendrite.async_api._core._type_spec import PlaywrightPage +from ..types import PlaywrightPage class ScreenshotManager: diff --git a/dendrite/browser/async_api/mixin/__init__.py b/dendrite/browser/async_api/mixin/__init__.py new file mode 100644 index 0000000..046a61c --- /dev/null +++ b/dendrite/browser/async_api/mixin/__init__.py @@ -0,0 +1,21 @@ +from .ask import AskMixin +from .click import ClickMixin +from .extract import ExtractionMixin +from .fill_fields import FillFieldsMixin +from .get_element import GetElementMixin +from .keyboard import KeyboardMixin +from .markdown import MarkdownMixin +from .screenshot import ScreenshotMixin +from .wait_for import WaitForMixin + +__all__ = [ + "AskMixin", + "ClickMixin", + "ExtractionMixin", + "FillFieldsMixin", + "GetElementMixin", + "KeyboardMixin", + "MarkdownMixin", + "ScreenshotMixin", + "WaitForMixin", +] diff --git a/dendrite/async_api/_core/mixin/ask.py b/dendrite/browser/async_api/mixin/ask.py similarity index 93% rename from dendrite/async_api/_core/mixin/ask.py rename to dendrite/browser/async_api/mixin/ask.py index 05f6a04..b7efd7a 100644 --- a/dendrite/async_api/_core/mixin/ask.py +++ b/dendrite/browser/async_api/mixin/ask.py @@ -4,16 +4,12 @@ from loguru import logger -from dendrite.async_api._api.dto.ask_page_dto import AskPageDTO -from dendrite.async_api._core._type_spec import ( - JsonSchema, - PydanticModel, - TypeSpec, - convert_to_type_spec, - to_json_schema, -) -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser.async_api._utils import convert_to_type_spec, to_json_schema +from dendrite.models.dto.ask_page_dto import AskPageDTO + +from ..protocol.page_protocol import DendritePageProtocol +from ..types import JsonSchema, PydanticModel, TypeSpec # The timeout interval between retries in milliseconds TIMEOUT_INTERVAL = [150, 450, 1000] @@ -135,7 +131,6 @@ async def ask( Raises: DendriteException: If the request fails, the exception includes the failure message and a screenshot. """ - api_config = self._get_dendrite_browser().api_config start_time = time.time() attempt_start = start_time attempt = -1 @@ -182,13 +177,12 @@ async def ask( dto = AskPageDTO( page_information=page_information, - api_config=api_config, prompt=entire_prompt, return_schema=schema, ) try: - res = await self._get_browser_api_client().ask_page(dto) + res = await self.logic_engine.ask_page(dto) logger.debug(f"Got response in {time.time() - attempt_start} seconds") if res.status == "error": diff --git a/dendrite/async_api/_core/mixin/click.py b/dendrite/browser/async_api/mixin/click.py similarity index 84% rename from dendrite/async_api/_core/mixin/click.py rename to dendrite/browser/async_api/mixin/click.py index e8b0370..d6460f2 100644 --- a/dendrite/async_api/_core/mixin/click.py +++ b/dendrite/browser/async_api/mixin/click.py @@ -1,11 +1,10 @@ -import asyncio -from typing import Any, Optional -from dendrite.async_api._api.response.interaction_response import ( - InteractionResponse, -) -from dendrite.async_api._core.mixin.get_element import GetElementMixin -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from typing import Optional + +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from dendrite.models.response.interaction_response import InteractionResponse + +from ..mixin.get_element import GetElementMixin +from ..protocol.page_protocol import DendritePageProtocol class ClickMixin(GetElementMixin, DendritePageProtocol): diff --git a/dendrite/browser/async_api/mixin/extract.py b/dendrite/browser/async_api/mixin/extract.py new file mode 100644 index 0000000..b718567 --- /dev/null +++ b/dendrite/browser/async_api/mixin/extract.py @@ -0,0 +1,317 @@ +import asyncio +import time +from typing import Any, Callable, List, Optional, Type, overload + +from loguru import logger + +from dendrite.browser.async_api._utils import convert_to_type_spec, to_json_schema +from dendrite.logic.code.code_session import execute +from dendrite.models.dto.cached_extract_dto import CachedExtractDTO +from dendrite.models.dto.extract_dto import ExtractDTO +from dendrite.models.response.extract_response import ExtractResponse +from dendrite.models.scripts import Script + +from ..manager.navigation_tracker import NavigationTracker +from ..protocol.page_protocol import DendritePageProtocol +from ..types import JsonSchema, PydanticModel, TypeSpec + +CACHE_TIMEOUT = 5 + + +class ExtractionMixin(DendritePageProtocol): + """ + Mixin that provides extraction functionality for web pages. + + This mixin provides various `extract` methods that allow extracting + different types of data (e.g., bool, int, float, string, Pydantic models, etc.) + from a web page based on a given prompt. + """ + + @overload + async def extract( + self, + prompt: str, + type_spec: Type[bool], + use_cache: bool = True, + timeout: int = 180, + ) -> bool: ... + + @overload + async def extract( + self, + prompt: str, + type_spec: Type[int], + use_cache: bool = True, + timeout: int = 180, + ) -> int: ... + + @overload + async def extract( + self, + prompt: str, + type_spec: Type[float], + use_cache: bool = True, + timeout: int = 180, + ) -> float: ... + + @overload + async def extract( + self, + prompt: str, + type_spec: Type[str], + use_cache: bool = True, + timeout: int = 180, + ) -> str: ... + + @overload + async def extract( + self, + prompt: Optional[str], + type_spec: Type[PydanticModel], + use_cache: bool = True, + timeout: int = 180, + ) -> PydanticModel: ... + + @overload + async def extract( + self, + prompt: Optional[str], + type_spec: JsonSchema, + use_cache: bool = True, + timeout: int = 180, + ) -> JsonSchema: ... + + @overload + async def extract( + self, + prompt: str, + type_spec: None = None, + use_cache: bool = True, + timeout: int = 180, + ) -> Any: ... + + async def extract( + self, + prompt: Optional[str], + type_spec: Optional[TypeSpec] = None, + use_cache: bool = True, + timeout: int = 180, + ) -> TypeSpec: + """ + Extract data from a web page based on a prompt and optional type specification. + Args: + prompt (Optional[str]): The prompt to describe the information to extract. + type_spec (Optional[TypeSpec], optional): The type specification for the extracted data. + use_cache (bool, optional): Whether to use cached results. Defaults to True. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached scripts before falling back to the + extraction agent for the remaining time that will attempt to generate a new script. Defaults to 15000 (15 seconds). + + Returns: + ExtractResponse: The extracted data wrapped in a ExtractResponse object. + Raises: + TimeoutError: If the extraction process exceeds the specified timeout. + """ + logger.info(f"Starting extraction with prompt: {prompt}") + + json_schema = None + if type_spec: + json_schema = to_json_schema(type_spec) + logger.debug(f"Type specification converted to JSON schema: {json_schema}") + + if prompt is None: + prompt = "" + + start_time = time.time() + page = await self._get_page() + navigation_tracker = NavigationTracker(page) + navigation_tracker.start_nav_tracking() + + # First try using cached extraction if enabled + if use_cache: + logger.info("Testing cache") + cached_result = await self._try_cached_extraction(prompt, json_schema) + if cached_result: + return convert_and_return_result(cached_result, type_spec) + + # If cache failed or disabled, proceed with extraction agent + logger.info( + "Using extraction agent to perform extraction, since no cache was found or failed." + ) + result = await self._extract_with_agent( + prompt, + json_schema, + timeout - (time.time() - start_time), + ) + + if result: + return convert_and_return_result(result, type_spec) + + logger.error(f"Extraction failed after {time.time() - start_time:.2f} seconds") + return None + + async def _try_cached_extraction( + self, + prompt: str, + json_schema: Optional[JsonSchema], + ) -> Optional[ExtractResponse]: + """ + Attempts to extract data using cached scripts with exponential backoff. + Only tries up to 5 most recent scripts. + + Args: + prompt: The prompt describing what to extract + json_schema: Optional JSON schema for type validation + + Returns: + ExtractResponse if successful, None otherwise + """ + page = await self._get_page() + dto = CachedExtractDTO(url=page.url, prompt=prompt) + scripts = await self.logic_engine.get_cached_scripts(dto) + logger.debug(f"Found {len(scripts)} scripts in cache, {scripts}") + if len(scripts) == 0: + logger.debug( + f"No scripts found in cache for prompt: {prompt} in domain: {page.url}" + ) + return None + + async def try_cached_extract(): + page = await self._get_page() + soup = await page._get_soup() + # Take at most the last 5 scripts + recent_scripts = scripts[-min(5, len(scripts)) :] + for script in recent_scripts: + res = await test_script(script, str(soup), json_schema) + if res is not None: + return ExtractResponse( + status="success", + message="Re-used a preexisting script from cache with the same specifications.", + return_data=res, + created_script=script.script, + ) + + return None + + return await _attempt_with_backoff_helper( + "cached_extraction", + try_cached_extract, + CACHE_TIMEOUT, + ) + + async def _extract_with_agent( + self, + prompt: str, + json_schema: Optional[JsonSchema], + remaining_timeout: float, + ) -> Optional[ExtractResponse]: + """ + Attempts to extract data using the extraction agent with exponential backoff. + + Args: + prompt: The prompt describing what to extract + json_schema: Optional JSON schema for type validation + remaining_timeout: Maximum time to spend on extraction + + Returns: + ExtractResponse if successful, None otherwise + """ + + async def try_extract_with_agent(): + page = await self._get_page() + page_information = await page.get_page_information(include_screenshot=True) + extract_dto = ExtractDTO( + page_information=page_information, + prompt=prompt, + return_data_json_schema=json_schema, + use_screenshot=True, + ) + + res: ExtractResponse = await self.logic_engine.extract(extract_dto) + + if res.status == "impossible": + logger.error(f"Impossible to extract data. Reason: {res.message}") + return None + + if res.status == "success": + logger.success(f"Extraction successful: '{res.message}'") + return res + + return None + + return await _attempt_with_backoff_helper( + "extraction_agent", + try_extract_with_agent, + remaining_timeout, + ) + + +async def _attempt_with_backoff_helper( + operation_name: str, + operation: Callable, + timeout: float, + backoff_intervals: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0], +) -> Optional[Any]: + """ + Generic helper function that implements exponential backoff for operations. + + Args: + operation_name: Name of the operation for logging + operation: Async function to execute + timeout: Maximum time to spend attempting the operation + backoff_intervals: List of timeouts between attempts + + Returns: + The result of the operation if successful, None otherwise + """ + total_elapsed_time = 0 + start_time = time.time() + + for i, current_timeout in enumerate(backoff_intervals): + if total_elapsed_time >= timeout: + logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") + return None + + request_start_time = time.time() + result = await operation() + request_duration = time.time() - request_start_time + + if result: + return result + + sleep_duration = max(0, current_timeout - request_duration) + logger.info( + f"{operation_name} attempt {i+1} failed. Sleeping for {sleep_duration:.2f} seconds" + ) + await asyncio.sleep(sleep_duration) + total_elapsed_time = time.time() - start_time + + logger.error( + f"All {operation_name} attempts failed after {total_elapsed_time:.2f} seconds" + ) + return None + + +def convert_and_return_result( + res: ExtractResponse, type_spec: Optional[TypeSpec] +) -> TypeSpec: + converted_res = res.return_data + if type_spec is not None: + logger.debug("Converting extraction result to specified type") + converted_res = convert_to_type_spec(type_spec, res.return_data) + + logger.info("Extraction process completed successfully") + return converted_res + + +async def test_script( + script: Script, + raw_html: str, + return_data_json_schema: Any, +) -> Optional[Any]: + + try: + res = execute(script.script, raw_html, return_data_json_schema) + return res + except Exception as e: + logger.debug(f"Script failed with error: {str(e)} ") diff --git a/dendrite/async_api/_core/mixin/fill_fields.py b/dendrite/browser/async_api/mixin/fill_fields.py similarity index 90% rename from dendrite/async_api/_core/mixin/fill_fields.py rename to dendrite/browser/async_api/mixin/fill_fields.py index 55d5760..fad759f 100644 --- a/dendrite/async_api/_core/mixin/fill_fields.py +++ b/dendrite/browser/async_api/mixin/fill_fields.py @@ -1,11 +1,11 @@ import asyncio from typing import Any, Dict, Optional -from dendrite.async_api._api.response.interaction_response import ( - InteractionResponse, -) -from dendrite.async_api._core.mixin.get_element import GetElementMixin -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException + +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from dendrite.models.response.interaction_response import InteractionResponse + +from ..mixin.get_element import GetElementMixin +from ..protocol.page_protocol import DendritePageProtocol class FillFieldsMixin(GetElementMixin, DendritePageProtocol): diff --git a/dendrite/browser/async_api/mixin/get_element.py b/dendrite/browser/async_api/mixin/get_element.py new file mode 100644 index 0000000..51f8235 --- /dev/null +++ b/dendrite/browser/async_api/mixin/get_element.py @@ -0,0 +1,304 @@ +import asyncio +import time +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Literal, + Optional, + Union, + overload, +) + +from bs4 import BeautifulSoup +from loguru import logger + +from .._utils import _get_all_elements_from_selector_soup +from ..dendrite_element import AsyncElement + +if TYPE_CHECKING: + from ..dendrite_page import AsyncPage + +from dendrite.models.dto.cached_selector_dto import CachedSelectorDTO +from dendrite.models.dto.get_elements_dto import GetElementsDTO + +from ..protocol.page_protocol import DendritePageProtocol + +CACHE_TIMEOUT = 5 + + +class GetElementMixin(DendritePageProtocol): + async def get_element( + self, + prompt: str, + use_cache=True, + timeout=15000, + ) -> Optional[AsyncElement]: + """ + Retrieves a single Dendrite element based on the provided prompt. + + Args: + prompt (str): The prompt describing the element to be retrieved. + use_cache (bool, optional): Whether to use cached results. Defaults to True. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + AsyncElement: The retrieved element. + """ + return await self._get_element( + prompt, + only_one=True, + use_cache=use_cache, + timeout=timeout / 1000, + ) + + @overload + async def _get_element( + self, + prompt_or_elements: str, + only_one: Literal[True], + use_cache: bool, + timeout, + ) -> Optional[AsyncElement]: + """ + Retrieves a single Dendrite element based on the provided prompt. + + Args: + prompt (Union[str, Dict[str, str]]): The prompt describing the element to be retrieved. + only_one (Literal[True]): Indicates that only one element should be retrieved. + use_cache (bool): Whether to use cached results. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + AsyncElement: The retrieved element. + """ + + @overload + async def _get_element( + self, + prompt_or_elements: str, + only_one: Literal[False], + use_cache: bool, + timeout, + ) -> List[AsyncElement]: + """ + Retrieves a list of Dendrite elements based on the provided prompt. + + Args: + prompt (str): The prompt describing the elements to be retrieved. + only_one (Literal[False]): Indicates that multiple elements should be retrieved. + use_cache (bool): Whether to use cached results. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + List[AsyncElement]: A list of retrieved elements. + """ + + async def _get_element( + self, + prompt_or_elements: str, + only_one: bool, + use_cache: bool, + timeout: float, + ) -> Union[ + Optional[AsyncElement], + List[AsyncElement], + ]: + """ + Retrieves Dendrite elements based on the provided prompt, either a single element or a list of elements. + + This method sends a request with the prompt and retrieves the elements based on the `only_one` flag. + + Args: + prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval. + only_one (bool): Whether to retrieve only one element or a list of elements. + use_cache (bool): Whether to use cached results. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + Union[AsyncElement, List[AsyncElement], AsyncElementsResponse]: The retrieved element, list of elements, or response object. + """ + + logger.info(f"Getting element for prompt: '{prompt_or_elements}'") + start_time = time.time() + page = await self._get_page() + soup = await page._get_soup() + + if use_cache: + cached_elements = await self._try_cached_selectors( + page, soup, prompt_or_elements, only_one + ) + if cached_elements: + return cached_elements + + # Now that no cached selectors were found or they failed repeatedly, let's use the find element agent + logger.info( + "Proceeding to use the find element agent to find the requested elements." + ) + res = await try_get_element( + self, + prompt_or_elements, + only_one, + remaining_timeout=timeout - (time.time() - start_time), + ) + if res: + return res + + logger.error( + f"Failed to retrieve elements within the specified timeout of {timeout} seconds" + ) + return None + + async def _try_cached_selectors( + self, + page: "AsyncPage", + soup: BeautifulSoup, + prompt: str, + only_one: bool, + ) -> Union[Optional[AsyncElement], List[AsyncElement]]: + """ + Attempts to retrieve elements using cached selectors with exponential backoff. + + Args: + page: The current page object + soup: The BeautifulSoup object of the current page + prompt: The prompt to search for + only_one: Whether to return only one element + + Returns: + The found elements if successful, None otherwise + """ + dto = CachedSelectorDTO(url=page.url, prompt=prompt) + selectors = await self.logic_engine.get_cached_selectors(dto) + + if len(selectors) == 0: + logger.debug("No cached selectors found") + return None + + logger.debug("Attempting to use cached selectors with backoff") + # Take at most the last 5 selectors + recent_selectors = selectors[-min(5, len(selectors)) :] + str_selectors = list(map(lambda x: x.selector, recent_selectors)) + + async def try_cached_selectors(): + return await get_elements_from_selectors_soup( + page, soup, str_selectors, only_one + ) + + return await _attempt_with_backoff_helper( + "cached_selectors", + try_cached_selectors, + timeout=CACHE_TIMEOUT, + ) + + +async def _attempt_with_backoff_helper( + operation_name: str, + operation: Callable, + timeout: float, + backoff_intervals: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0], +) -> Optional[Any]: + """ + Generic helper function that implements exponential backoff for operations. + + Args: + operation_name: Name of the operation for logging + operation: Async function to execute + timeout: Maximum time to spend attempting the operation + backoff_intervals: List of timeouts between attempts + + Returns: + The result of the operation if successful, None otherwise + """ + total_elapsed_time = 0 + start_time = time.time() + + for i, current_timeout in enumerate(backoff_intervals): + if total_elapsed_time >= timeout: + logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") + return None + + request_start_time = time.time() + result = await operation() + request_duration = time.time() - request_start_time + + if result: + return result + + sleep_duration = max(0, current_timeout - request_duration) + logger.info( + f"{operation_name} attempt {i+1} failed. Sleeping for {sleep_duration:.2f} seconds" + ) + await asyncio.sleep(sleep_duration) + total_elapsed_time = time.time() - start_time + + logger.error( + f"All {operation_name} attempts failed after {total_elapsed_time:.2f} seconds" + ) + return None + + +async def try_get_element( + obj: DendritePageProtocol, + prompt_or_elements: Union[str, Dict[str, str]], + only_one: bool, + remaining_timeout: float, +) -> Union[Optional[AsyncElement], List[AsyncElement]]: + + async def _try_get_element(): + page = await obj._get_page() + page_information = await page.get_page_information() + dto = GetElementsDTO( + page_information=page_information, + prompt=prompt_or_elements, + only_one=only_one, + ) + res = await obj.logic_engine.get_element(dto) + + if res.status == "impossible": + logger.error( + f"Impossible to get elements for '{prompt_or_elements}'. Reason: {res.message}" + ) + return None + + if res.status == "success": + logger.success(f"d[id]: {res.d_id} Selectors:{res.selectors}") + if res.selectors is not None: + return await get_elements_from_selectors_soup( + page, await page._get_previous_soup(), res.selectors, only_one + ) + return None + + return await _attempt_with_backoff_helper( + "find_element_agent", + _try_get_element, + remaining_timeout, + ) + + +async def get_elements_from_selectors_soup( + page: "AsyncPage", + soup: BeautifulSoup, + selectors: List[str], + only_one: bool, +) -> Union[Optional[AsyncElement], List[AsyncElement]]: + + for selector in reversed(selectors): + dendrite_elements = await _get_all_elements_from_selector_soup( + selector, soup, page + ) + + if len(dendrite_elements) > 0: + return dendrite_elements[0] if only_one else dendrite_elements + + return None diff --git a/dendrite/async_api/_core/mixin/keyboard.py b/dendrite/browser/async_api/mixin/keyboard.py similarity index 90% rename from dendrite/async_api/_core/mixin/keyboard.py rename to dendrite/browser/async_api/mixin/keyboard.py index ee26559..bf4e145 100644 --- a/dendrite/async_api/_core/mixin/keyboard.py +++ b/dendrite/browser/async_api/mixin/keyboard.py @@ -1,6 +1,8 @@ -from typing import Any, Union, Literal -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from typing import Literal, Union + +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException + +from ..protocol.page_protocol import DendritePageProtocol class KeyboardMixin(DendritePageProtocol): diff --git a/dendrite/async_api/_core/mixin/markdown.py b/dendrite/browser/async_api/mixin/markdown.py similarity index 90% rename from dendrite/async_api/_core/mixin/markdown.py rename to dendrite/browser/async_api/mixin/markdown.py index 01ada25..687db67 100644 --- a/dendrite/async_api/_core/mixin/markdown.py +++ b/dendrite/browser/async_api/mixin/markdown.py @@ -1,12 +1,12 @@ -from typing import Optional -from bs4 import BeautifulSoup import re +from typing import Optional -from dendrite.async_api._core.mixin.extract import ExtractionMixin -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol - +from bs4 import BeautifulSoup from markdownify import markdownify as md +from ..mixin.extract import ExtractionMixin +from ..protocol.page_protocol import DendritePageProtocol + class MarkdownMixin(ExtractionMixin, DendritePageProtocol): async def markdown(self, prompt: Optional[str] = None): diff --git a/dendrite/async_api/_core/mixin/screenshot.py b/dendrite/browser/async_api/mixin/screenshot.py similarity index 88% rename from dendrite/async_api/_core/mixin/screenshot.py rename to dendrite/browser/async_api/mixin/screenshot.py index c150eb4..200d4a1 100644 --- a/dendrite/async_api/_core/mixin/screenshot.py +++ b/dendrite/browser/async_api/mixin/screenshot.py @@ -1,4 +1,4 @@ -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol +from ..protocol.page_protocol import DendritePageProtocol class ScreenshotMixin(DendritePageProtocol): diff --git a/dendrite/async_api/_core/mixin/wait_for.py b/dendrite/browser/async_api/mixin/wait_for.py similarity index 88% rename from dendrite/async_api/_core/mixin/wait_for.py rename to dendrite/browser/async_api/mixin/wait_for.py index 6bd042e..7c60f88 100644 --- a/dendrite/async_api/_core/mixin/wait_for.py +++ b/dendrite/browser/async_api/mixin/wait_for.py @@ -1,13 +1,15 @@ import asyncio import time +from loguru import logger -from dendrite.async_api._core.mixin.ask import AskMixin -from dendrite.async_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import PageConditionNotMet -from dendrite._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser._common._exceptions.dendrite_exception import ( + DendriteException, + PageConditionNotMet, +) -from loguru import logger +from ..mixin.ask import AskMixin +from ..protocol.page_protocol import DendritePageProtocol class WaitForMixin(AskMixin, DendritePageProtocol): diff --git a/dendrite/async_api/_common/__init__.py b/dendrite/browser/async_api/protocol/__init__.py similarity index 100% rename from dendrite/async_api/_common/__init__.py rename to dendrite/browser/async_api/protocol/__init__.py diff --git a/dendrite/browser/async_api/protocol/browser_protocol.py b/dendrite/browser/async_api/protocol/browser_protocol.py new file mode 100644 index 0000000..304064d --- /dev/null +++ b/dendrite/browser/async_api/protocol/browser_protocol.py @@ -0,0 +1,68 @@ +from typing import TYPE_CHECKING, Optional, Protocol, Union + +from typing_extensions import Literal + +from dendrite.browser.remote import Providers + +if TYPE_CHECKING: + from ..dendrite_browser import AsyncDendrite + +from playwright.async_api import Browser, Download, Playwright + +from ..types import PlaywrightPage + + +class BrowserProtocol(Protocol): + def __init__(self, settings: Providers) -> None: ... + + async def get_download( + self, dendrite_browser: "AsyncDendrite", pw_page: PlaywrightPage, timeout: float + ) -> Download: + """ + Retrieves the download event from the browser. + + Returns: + Download: The download event. + + Raises: + Exception: If there is an issue retrieving the download event. + """ + ... + + async def start_browser( + self, + playwright: Playwright, + pw_options: dict, + ) -> Browser: + """ + Starts the browser session. + + Args: + playwright: The playwright instance + pw_options: Playwright launch options + + Returns: + Browser: A Browser instance + """ + ... + + async def configure_context(self, browser: "AsyncDendrite") -> None: + """ + Configures the browser context. + + Args: + browser (AsyncDendrite): The browser to configure. + + Raises: + Exception: If there is an issue configuring the browser context. + """ + ... + + async def stop_session(self) -> None: + """ + Stops the browser session. + + Raises: + Exception: If there is an issue stopping the browser session. + """ + ... diff --git a/dendrite/async_api/_core/models/download_interface.py b/dendrite/browser/async_api/protocol/download_protocol.py similarity index 99% rename from dendrite/async_api/_core/models/download_interface.py rename to dendrite/browser/async_api/protocol/download_protocol.py index c38a486..bdb7ba9 100644 --- a/dendrite/async_api/_core/models/download_interface.py +++ b/dendrite/browser/async_api/protocol/download_protocol.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from pathlib import Path from typing import Any, Union + from playwright.async_api import Download diff --git a/dendrite/browser/async_api/protocol/page_protocol.py b/dendrite/browser/async_api/protocol/page_protocol.py new file mode 100644 index 0000000..7716352 --- /dev/null +++ b/dendrite/browser/async_api/protocol/page_protocol.py @@ -0,0 +1,22 @@ +from typing import TYPE_CHECKING, Protocol + +from dendrite.logic import AsyncLogicEngine + +if TYPE_CHECKING: + from ..dendrite_browser import AsyncDendrite + from ..dendrite_page import AsyncPage + + +class DendritePageProtocol(Protocol): + """ + Protocol that specifies the required methods and attributes + for the `ExtractionMixin` to work. + """ + + @property + def logic_engine(self) -> AsyncLogicEngine: ... + + @property + def dendrite_browser(self) -> "AsyncDendrite": ... + + async def _get_page(self) -> "AsyncPage": ... diff --git a/dendrite/browser/async_api/types.py b/dendrite/browser/async_api/types.py new file mode 100644 index 0000000..1703b8c --- /dev/null +++ b/dendrite/browser/async_api/types.py @@ -0,0 +1,15 @@ +import inspect +from typing import Any, Dict, Literal, Type, TypeVar, Union + +from playwright.async_api import Page +from pydantic import BaseModel + +Interaction = Literal["click", "fill", "hover"] + +T = TypeVar("T") +PydanticModel = TypeVar("PydanticModel", bound=BaseModel) +PrimitiveTypes = PrimitiveTypes = Union[Type[bool], Type[int], Type[float], Type[str]] +JsonSchema = Dict[str, Any] +TypeSpec = Union[PrimitiveTypes, PydanticModel, JsonSchema] + +PlaywrightPage = Page diff --git a/dendrite/browser/remote/__init__.py b/dendrite/browser/remote/__init__.py new file mode 100644 index 0000000..b37ef34 --- /dev/null +++ b/dendrite/browser/remote/__init__.py @@ -0,0 +1,8 @@ +from typing import Union + +from dendrite.browser.remote.browserbase_config import BrowserbaseConfig +from dendrite.browser.remote.browserless_config import BrowserlessConfig + +Providers = Union[BrowserbaseConfig, BrowserlessConfig] + +__all__ = ["Providers", "BrowserbaseConfig"] diff --git a/dendrite/remote/browserbase_config.py b/dendrite/browser/remote/browserbase_config.py similarity index 99% rename from dendrite/remote/browserbase_config.py rename to dendrite/browser/remote/browserbase_config.py index b526b52..f86b02c 100644 --- a/dendrite/remote/browserbase_config.py +++ b/dendrite/browser/remote/browserbase_config.py @@ -1,5 +1,6 @@ import os from typing import Optional + from dendrite.exceptions import MissingApiKeyError diff --git a/dendrite/remote/browserless_config.py b/dendrite/browser/remote/browserless_config.py similarity index 88% rename from dendrite/remote/browserless_config.py rename to dendrite/browser/remote/browserless_config.py index 88a4efe..7e3bcc8 100644 --- a/dendrite/remote/browserless_config.py +++ b/dendrite/browser/remote/browserless_config.py @@ -1,7 +1,7 @@ import os from typing import Optional -from dendrite._common._exceptions.dendrite_exception import MissingApiKeyError +from dendrite.browser._common._exceptions.dendrite_exception import MissingApiKeyError class BrowserlessConfig: diff --git a/dendrite/remote/provider.py b/dendrite/browser/remote/provider.py similarity index 92% rename from dendrite/remote/provider.py rename to dendrite/browser/remote/provider.py index 8a5135f..fd615b0 100644 --- a/dendrite/remote/provider.py +++ b/dendrite/browser/remote/provider.py @@ -1,10 +1,8 @@ from pathlib import Path from typing import Union - -from dendrite.remote import Providers -from dendrite.remote.browserbase_config import BrowserbaseConfig - +from dendrite.browser.remote import Providers +from dendrite.browser.remote.browserbase_config import BrowserbaseConfig try: import tomllib # type: ignore diff --git a/dendrite/browser/sync_api/__init__.py b/dendrite/browser/sync_api/__init__.py new file mode 100644 index 0000000..8beebcc --- /dev/null +++ b/dendrite/browser/sync_api/__init__.py @@ -0,0 +1,6 @@ +from loguru import logger +from .dendrite_browser import Dendrite +from .dendrite_element import Element +from .dendrite_page import Page + +__all__ = ["Dendrite", "Element", "Page"] diff --git a/dendrite/sync_api/_common/event_sync.py b/dendrite/browser/sync_api/_event_sync.py similarity index 91% rename from dendrite/sync_api/_common/event_sync.py rename to dendrite/browser/sync_api/_event_sync.py index 162bb8e..4351eee 100644 --- a/dendrite/sync_api/_common/event_sync.py +++ b/dendrite/browser/sync_api/_event_sync.py @@ -1,7 +1,7 @@ import time import time -from typing import Generic, Optional, Type, TypeVar, Union, cast -from playwright.sync_api import Page, Download, FileChooser +from typing import Generic, Optional, Type, TypeVar +from playwright.sync_api import Download, FileChooser, Page Events = TypeVar("Events", Download, FileChooser) mapping = {Download: "download", FileChooser: "filechooser"} diff --git a/dendrite/browser/sync_api/_utils.py b/dendrite/browser/sync_api/_utils.py new file mode 100644 index 0000000..24f2b8a --- /dev/null +++ b/dendrite/browser/sync_api/_utils.py @@ -0,0 +1,123 @@ +import inspect +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +import tldextract +from bs4 import BeautifulSoup +from loguru import logger +from playwright.sync_api import Error, Frame +from pydantic import BaseModel +from dendrite.models.selector import Selector +from .dendrite_element import Element +from .types import PlaywrightPage, TypeSpec + +if TYPE_CHECKING: + from .dendrite_page import Page +from dendrite.logic.dom.strip import mild_strip_in_place +from .js import GENERATE_DENDRITE_IDS_IFRAME_SCRIPT + + +def get_domain_w_suffix(url: str) -> str: + parsed_url = tldextract.extract(url) + if parsed_url.suffix == "": + raise ValueError(f"Invalid URL: {url}") + return f"{parsed_url.domain}.{parsed_url.suffix}" + + +def expand_iframes(page: PlaywrightPage, page_soup: BeautifulSoup): + + def get_iframe_path(frame: Frame): + path_parts = [] + current_frame = frame + while current_frame.parent_frame is not None: + iframe_element = current_frame.frame_element() + iframe_id = iframe_element.get_attribute("d-id") + if iframe_id is None: + return None + path_parts.insert(0, iframe_id) + current_frame = current_frame.parent_frame + return "|".join(path_parts) + + for frame in page.frames: + if frame.parent_frame is None: + continue + try: + iframe_element = frame.frame_element() + iframe_id = iframe_element.get_attribute("d-id") + if iframe_id is None: + continue + iframe_path = get_iframe_path(frame) + except Error as e: + continue + if iframe_path is None: + continue + try: + frame.evaluate( + GENERATE_DENDRITE_IDS_IFRAME_SCRIPT, {"frame_path": iframe_path} + ) + frame_content = frame.content() + frame_tree = BeautifulSoup(frame_content, "lxml") + mild_strip_in_place(frame_tree) + merge_iframe_to_page(iframe_id, page_soup, frame_tree) + except Error as e: + continue + + +def merge_iframe_to_page(iframe_id: str, page: BeautifulSoup, iframe: BeautifulSoup): + iframe_element = page.find("iframe", {"d-id": iframe_id}) + if iframe_element is None: + logger.debug(f"Could not find iframe with ID {iframe_id} in page soup") + return + iframe_element.replace_with(iframe) + + +def _get_all_elements_from_selector_soup( + selector: str, soup: BeautifulSoup, page: "Page" +) -> List[Element]: + dendrite_elements: List[Element] = [] + elements = soup.select(selector) + for element in elements: + frame = page._get_context(element) + d_id = element.get("d-id", "") + locator = frame.locator(f"xpath=//*[@d-id='{d_id}']") + if not d_id: + continue + if isinstance(d_id, list): + d_id = d_id[0] + dendrite_elements.append( + Element(d_id, locator, page.dendrite_browser, page._browser_api_client) + ) + return dendrite_elements + + +def get_elements_from_selectors_soup( + page: "Page", soup: BeautifulSoup, selectors: List[Selector], only_one: bool +) -> Union[Optional[Element], List[Element]]: + for selector in reversed(selectors): + dendrite_elements = _get_all_elements_from_selector_soup( + selector.selector, soup, page + ) + if len(dendrite_elements) > 0: + return dendrite_elements[0] if only_one else dendrite_elements + return None + + +def to_json_schema(type_spec: TypeSpec) -> Dict[str, Any]: + if isinstance(type_spec, dict): + return type_spec + if inspect.isclass(type_spec) and issubclass(type_spec, BaseModel): + return type_spec.model_json_schema() + if type_spec in (bool, int, float, str): + type_map = {bool: "boolean", int: "integer", float: "number", str: "string"} + return {"type": type_map[type_spec]} + raise ValueError(f"Unsupported type specification: {type_spec}") + + +def convert_to_type_spec(type_spec: TypeSpec, return_data: Any) -> TypeSpec: + if isinstance(type_spec, type): + if issubclass(type_spec, BaseModel): + return type_spec.model_validate(return_data) + if type_spec in (str, float, bool, int): + return type_spec(return_data) + raise ValueError(f"Unsupported type: {type_spec}") + if isinstance(type_spec, dict): + return return_data + raise ValueError(f"Unsupported type specification: {type_spec}") diff --git a/dendrite/sync_api/_ext_impl/__init__.py b/dendrite/browser/sync_api/browser_impl/__init__.py similarity index 100% rename from dendrite/sync_api/_ext_impl/__init__.py rename to dendrite/browser/sync_api/browser_impl/__init__.py diff --git a/dendrite/sync_api/_ext_impl/browserbase/__init__.py b/dendrite/browser/sync_api/browser_impl/browserbase/__init__.py similarity index 100% rename from dendrite/sync_api/_ext_impl/browserbase/__init__.py rename to dendrite/browser/sync_api/browser_impl/browserbase/__init__.py diff --git a/dendrite/sync_api/_ext_impl/browserbase/_client.py b/dendrite/browser/sync_api/browser_impl/browserbase/_client.py similarity index 96% rename from dendrite/sync_api/_ext_impl/browserbase/_client.py rename to dendrite/browser/sync_api/browser_impl/browserbase/_client.py index 5d862e2..ddc6831 100644 --- a/dendrite/sync_api/_ext_impl/browserbase/_client.py +++ b/dendrite/browser/sync_api/browser_impl/browserbase/_client.py @@ -1,10 +1,10 @@ import time -from pathlib import Path import time +from pathlib import Path from typing import Optional, Union import httpx from loguru import logger -from dendrite._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException class BrowserbaseClient: diff --git a/dendrite/sync_api/_ext_impl/browserbase/_download.py b/dendrite/browser/sync_api/browser_impl/browserbase/_download.py similarity index 91% rename from dendrite/sync_api/_ext_impl/browserbase/_download.py rename to dendrite/browser/sync_api/browser_impl/browserbase/_download.py index e669ba1..c464c81 100644 --- a/dendrite/sync_api/_ext_impl/browserbase/_download.py +++ b/dendrite/browser/sync_api/browser_impl/browserbase/_download.py @@ -1,12 +1,12 @@ -from pathlib import Path import re import shutil -from typing import Union import zipfile +from pathlib import Path +from typing import Union from loguru import logger from playwright.sync_api import Download -from dendrite.sync_api._core.models.download_interface import DownloadInterface -from dendrite.sync_api._ext_impl.browserbase._client import BrowserbaseClient +from dendrite.browser.sync_api.browser_impl.browserbase._client import BrowserbaseClient +from dendrite.browser.sync_api.protocol.download_protocol import DownloadInterface class BrowserbaseDownload(DownloadInterface): diff --git a/dendrite/sync_api/_ext_impl/browserbase/_impl.py b/dendrite/browser/sync_api/browser_impl/browserbase/_impl.py similarity index 78% rename from dendrite/sync_api/_ext_impl/browserbase/_impl.py rename to dendrite/browser/sync_api/browser_impl/browserbase/_impl.py index 453c6b6..60ceaf3 100644 --- a/dendrite/sync_api/_ext_impl/browserbase/_impl.py +++ b/dendrite/browser/sync_api/browser_impl/browserbase/_impl.py @@ -1,18 +1,20 @@ from typing import TYPE_CHECKING, Optional -from dendrite._common._exceptions.dendrite_exception import BrowserNotLaunchedError -from dendrite.sync_api._core._impl_browser import ImplBrowser -from dendrite.sync_api._core._type_spec import PlaywrightPage -from dendrite.remote.browserbase_config import BrowserbaseConfig +from dendrite.browser._common._exceptions.dendrite_exception import ( + BrowserNotLaunchedError, +) +from dendrite.browser.sync_api.protocol.browser_protocol import BrowserProtocol +from dendrite.browser.sync_api.types import PlaywrightPage +from dendrite.browser.remote.browserbase_config import BrowserbaseConfig if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_browser import Dendrite -from dendrite.sync_api._ext_impl.browserbase._client import BrowserbaseClient -from playwright.sync_api import Playwright + from dendrite.browser.sync_api.dendrite_browser import Dendrite from loguru import logger -from dendrite.sync_api._ext_impl.browserbase._download import BrowserbaseDownload +from playwright.sync_api import Playwright +from ._client import BrowserbaseClient +from ._download import BrowserbaseDownload -class BrowserBaseImpl(ImplBrowser): +class BrowserbaseImpl(BrowserProtocol): def __init__(self, settings: BrowserbaseConfig) -> None: self.settings = settings diff --git a/dendrite/async_api/_core/__init__.py b/dendrite/browser/sync_api/browser_impl/browserless/__init__.py similarity index 100% rename from dendrite/async_api/_core/__init__.py rename to dendrite/browser/sync_api/browser_impl/browserless/__init__.py diff --git a/dendrite/sync_api/_ext_impl/browserless/_impl.py b/dendrite/browser/sync_api/browser_impl/browserless/_impl.py similarity index 70% rename from dendrite/sync_api/_ext_impl/browserless/_impl.py rename to dendrite/browser/sync_api/browser_impl/browserless/_impl.py index 5d888e6..822ed48 100644 --- a/dendrite/sync_api/_ext_impl/browserless/_impl.py +++ b/dendrite/browser/sync_api/browser_impl/browserless/_impl.py @@ -1,20 +1,24 @@ import json from typing import TYPE_CHECKING, Optional -from dendrite._common._exceptions.dendrite_exception import BrowserNotLaunchedError -from dendrite.sync_api._core._impl_browser import ImplBrowser -from dendrite.sync_api._core._type_spec import PlaywrightPage -from dendrite.remote.browserless_config import BrowserlessConfig +from dendrite.browser._common._exceptions.dendrite_exception import ( + BrowserNotLaunchedError, +) +from dendrite.browser.sync_api.protocol.browser_protocol import BrowserProtocol +from dendrite.browser.sync_api.types import PlaywrightPage +from dendrite.browser.remote.browserless_config import BrowserlessConfig if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_browser import Dendrite -from dendrite.sync_api._ext_impl.browserbase._client import BrowserbaseClient -from playwright.sync_api import Playwright -from loguru import logger + from dendrite.browser.sync_api.dendrite_browser import Dendrite import urllib.parse -from dendrite.sync_api._ext_impl.browserbase._download import BrowserbaseDownload +from loguru import logger +from playwright.sync_api import Playwright +from dendrite.browser.sync_api.browser_impl.browserbase._client import BrowserbaseClient +from dendrite.browser.sync_api.browser_impl.browserbase._download import ( + BrowserbaseDownload, +) -class BrowserlessImpl(ImplBrowser): +class BrowserlessImpl(BrowserProtocol): def __init__(self, settings: BrowserlessConfig) -> None: self.settings = settings diff --git a/dendrite/browser/sync_api/browser_impl/impl_mapping.py b/dendrite/browser/sync_api/browser_impl/impl_mapping.py new file mode 100644 index 0000000..d1e3d65 --- /dev/null +++ b/dendrite/browser/sync_api/browser_impl/impl_mapping.py @@ -0,0 +1,29 @@ +from typing import Dict, Optional, Type +from dendrite.browser.remote import Providers +from dendrite.browser.remote.browserbase_config import BrowserbaseConfig +from dendrite.browser.remote.browserless_config import BrowserlessConfig +from ..protocol.browser_protocol import BrowserProtocol +from .browserbase._impl import BrowserbaseImpl +from .browserless._impl import BrowserlessImpl +from .local._impl import LocalImpl + +IMPL_MAPPING: Dict[Type[Providers], Type[BrowserProtocol]] = { + BrowserbaseConfig: BrowserbaseImpl, + BrowserlessConfig: BrowserlessImpl, +} +SETTINGS_CLASSES: Dict[str, Type[Providers]] = { + "browserbase": BrowserbaseConfig, + "browserless": BrowserlessConfig, +} + + +def get_impl(remote_provider: Optional[Providers]) -> BrowserProtocol: + if remote_provider is None: + return LocalImpl() + try: + provider_class = IMPL_MAPPING[type(remote_provider)] + except KeyError: + raise ValueError( + f"No implementation for {type(remote_provider)}. Available providers: {', '.join(map(lambda x: x.__name__, IMPL_MAPPING.keys()))}" + ) + return provider_class(remote_provider) diff --git a/dendrite/browser/sync_api/browser_impl/local/_impl.py b/dendrite/browser/sync_api/browser_impl/local/_impl.py new file mode 100644 index 0000000..e995cc1 --- /dev/null +++ b/dendrite/browser/sync_api/browser_impl/local/_impl.py @@ -0,0 +1,45 @@ +from pathlib import Path +from typing import TYPE_CHECKING, Optional, Union, overload +from loguru import logger +from typing_extensions import Literal +from dendrite.browser._common.constants import STEALTH_ARGS + +if TYPE_CHECKING: + from dendrite.browser.sync_api.dendrite_browser import Dendrite +import os +import shutil +import tempfile +from playwright.sync_api import ( + Browser, + BrowserContext, + Download, + Playwright, + StorageState, +) +from dendrite.browser.sync_api.protocol.browser_protocol import BrowserProtocol +from dendrite.browser.sync_api.types import PlaywrightPage + + +class LocalImpl(BrowserProtocol): + + def __init__(self) -> None: + pass + + def start_browser( + self, + playwright: Playwright, + pw_options: dict, + storage_state: Optional[StorageState] = None, + ) -> Browser: + return playwright.chromium.launch(**pw_options) + + def get_download( + self, dendrite_browser: "Dendrite", pw_page: PlaywrightPage, timeout: float + ) -> Download: + return dendrite_browser._download_handler.get_data(pw_page, timeout) + + def configure_context(self, browser: "Dendrite"): + pass + + def stop_session(self): + pass diff --git a/dendrite/sync_api/_core/dendrite_browser.py b/dendrite/browser/sync_api/dendrite_browser.py similarity index 69% rename from dendrite/sync_api/_core/dendrite_browser.py rename to dendrite/browser/sync_api/dendrite_browser.py index 259841e..6747a19 100644 --- a/dendrite/sync_api/_core/dendrite_browser.py +++ b/dendrite/browser/sync_api/dendrite_browser.py @@ -1,46 +1,45 @@ -from abc import ABC, abstractmethod +import os import pathlib import re -from typing import Any, List, Literal, Optional, Sequence, Union +from abc import ABC +from typing import Any, List, Optional, Sequence, Union from uuid import uuid4 -import os from loguru import logger from playwright.sync_api import ( - sync_playwright, - Playwright, - BrowserContext, - FileChooser, Download, Error, + FileChooser, FilePayload, + StorageState, + sync_playwright, ) -from dendrite.sync_api._api.dto.authenticate_dto import AuthenticateDTO -from dendrite.sync_api._api.dto.upload_auth_session_dto import UploadAuthSessionDTO -from dendrite.sync_api._common.event_sync import EventSync -from dendrite.sync_api._core._impl_browser import ImplBrowser -from dendrite.sync_api._core._impl_mapping import get_impl -from dendrite.sync_api._core._managers.page_manager import PageManager -from dendrite.sync_api._core._type_spec import PlaywrightPage -from dendrite.sync_api._core.dendrite_page import Page -from dendrite.sync_api._common.constants import STEALTH_ARGS -from dendrite.sync_api._core.mixin.ask import AskMixin -from dendrite.sync_api._core.mixin.click import ClickMixin -from dendrite.sync_api._core.mixin.extract import ExtractionMixin -from dendrite.sync_api._core.mixin.fill_fields import FillFieldsMixin -from dendrite.sync_api._core.mixin.get_element import GetElementMixin -from dendrite.sync_api._core.mixin.keyboard import KeyboardMixin -from dendrite.sync_api._core.mixin.screenshot import ScreenshotMixin -from dendrite.sync_api._core.mixin.wait_for import WaitForMixin -from dendrite.sync_api._core.mixin.markdown import MarkdownMixin -from dendrite.sync_api._core.models.authentication import AuthSession -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._api.browser_api_client import BrowserAPIClient -from dendrite._common._exceptions.dendrite_exception import ( +from dendrite.browser._common._exceptions.dendrite_exception import ( BrowserNotLaunchedError, DendriteException, IncorrectOutcomeError, ) -from dendrite.remote import Providers +from dendrite.browser._common.constants import STEALTH_ARGS +from dendrite.browser.sync_api._utils import get_domain_w_suffix +from dendrite.browser.remote import Providers +from dendrite.logic.config import Config +from dendrite.logic import LogicEngine +from ._event_sync import EventSync +from .browser_impl.impl_mapping import get_impl +from .dendrite_page import Page +from .manager.page_manager import PageManager +from .mixin import ( + AskMixin, + ClickMixin, + ExtractionMixin, + FillFieldsMixin, + GetElementMixin, + KeyboardMixin, + MarkdownMixin, + ScreenshotMixin, + WaitForMixin, +) +from .protocol.browser_protocol import BrowserProtocol +from .types import PlaywrightPage class Dendrite( @@ -80,44 +79,32 @@ class Dendrite( def __init__( self, - auth: Optional[Union[str, List[str]]] = None, - dendrite_api_key: Optional[str] = None, - openai_api_key: Optional[str] = None, - anthropic_api_key: Optional[str] = None, playwright_options: Any = {"headless": False, "args": STEALTH_ARGS}, remote_config: Optional[Providers] = None, + config: Optional[Config] = None, + auth: Optional[Union[List[str], str]] = None, ): """ - Initializes Dendrite with API keys and Playwright options. + Initialize Dendrite with optional domain authentication. Args: - auth (Optional[Union[str, List[str]]]): The domains on which the browser should try and authenticate. - dendrite_api_key (Optional[str]): The Dendrite API key. If not provided, it's fetched from the environment variables. - openai_api_key (Optional[str]): Your own OpenAI API key, provide it, along with other custom API keys, if you wish to use Dendrite without paying for a license. - anthropic_api_key (Optional[str]): The own Anthropic API key, provide it, along with other custom API keys, if you wish to use Dendrite without paying for a license. - playwright_options (Any): Options for configuring Playwright. Defaults to running in non-headless mode with stealth arguments. - - Raises: - MissingApiKeyError: If the Dendrite API key is not provided or found in the environment variables. + playwright_options: Options for configuring Playwright + remote_config: Remote browser provider configuration + config: Configuration object + auth: List of domains or single domain to load authentication state for """ - api_config = APIConfig( - dendrite_api_key=dendrite_api_key or os.environ.get("DENDRITE_API_KEY"), - openai_api_key=openai_api_key, - anthropic_api_key=anthropic_api_key, - ) self._impl = self._get_impl(remote_config) - self.api_config = api_config - self.playwright: Optional[Playwright] = None - self.browser_context: Optional[BrowserContext] = None - self._id = uuid4().hex self._playwright_options = playwright_options + self._config = config or Config() + auth_url = [auth] if isinstance(auth, str) else auth or [] + self._auth_domains = [get_domain_w_suffix(url) for url in auth_url] + self._id = uuid4().hex self._active_page_manager: Optional[PageManager] = None self._user_id: Optional[str] = None self._upload_handler = EventSync(event_type=FileChooser) self._download_handler = EventSync(event_type=Download) self.closed = False - self._auth = auth - self._browser_api_client = BrowserAPIClient(api_config, self._id) + self._browser_api_client: LogicEngine = LogicEngine(self._config) @property def pages(self) -> List[Page]: @@ -136,10 +123,12 @@ def _get_page(self) -> Page: active_page = self.get_active_page() return active_page - def _get_browser_api_client(self) -> BrowserAPIClient: + @property + def logic_engine(self) -> LogicEngine: return self._browser_api_client - def _get_dendrite_browser(self) -> "Dendrite": + @property + def dendrite_browser(self) -> "Dendrite": return self def __enter__(self): @@ -148,14 +137,9 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): self.close() - def _get_impl(self, remote_provider: Optional[Providers]) -> ImplBrowser: + def _get_impl(self, remote_provider: Optional[Providers]) -> BrowserProtocol: return get_impl(remote_provider) - def _get_auth_session(self, domains: Union[str, list[str]]): - dto = AuthenticateDTO(domains=domains) - auth_session: AuthSession = self._browser_api_client.authenticate(dto) - return auth_session - def get_active_page(self) -> Page: """ Retrieves the currently active page managed by the PageManager. @@ -268,13 +252,15 @@ def _launch(self): """ os.environ["PW_TEST_SCREENSHOT_NO_FONTS_READY"] = "1" self._playwright = sync_playwright().start() + storage_states = [] + for domain in self._auth_domains: + state = self._get_domain_storage_state(domain) + if state: + storage_states.append(state) browser = self._impl.start_browser(self._playwright, self._playwright_options) - if self._auth: - auth_session = self._get_auth_session(self._auth) - self.browser_context = browser.new_context( - storage_state=auth_session.to_storage_state(), - user_agent=auth_session.user_agent, - ) + if storage_states: + merged_state = self._merge_storage_states(storage_states) + self.browser_context = browser.new_context(storage_state=merged_state) else: self.browser_context = ( browser.contexts[0] @@ -301,27 +287,22 @@ def add_cookies(self, cookies): def close(self): """ - Closes the browser and uploads authentication session data if available. + Closes the browser and updates storage states for authenticated domains before cleanup. - This method stops the Playwright instance, closes the browser context, and uploads any - stored authentication session data if applicable. + This method updates the storage states for authenticated domains, stops the Playwright + instance, and closes the browser context. Returns: None Raises: - Exception: If there is an issue closing the browser or uploading session data. + Exception: If there is an issue closing the browser or updating session data. """ self.closed = True try: - if self.browser_context: - if self._auth: - auth_session = self._get_auth_session(self._auth) - storage_state = self.browser_context.storage_state() - dto = UploadAuthSessionDTO( - auth_data=auth_session, storage_state=storage_state - ) - self._browser_api_client.upload_auth_session(dto) + if self.browser_context and self._auth_domains: + for domain in self._auth_domains: + self.save_auth(domain) self._impl.stop_session() self.browser_context.close() except Error: @@ -329,9 +310,7 @@ def close(self): try: if self._playwright: self._playwright.stop() - except AttributeError: - pass - except Exception: + except (AttributeError, Exception): pass def _is_launched(self): @@ -426,3 +405,81 @@ def _get_filechooser( Exception: If there is an issue uploading files. """ return self._upload_handler.get_data(pw_page, timeout=timeout) + + def save_auth(self, url: str) -> None: + """ + Save authentication state for a specific domain. + + Args: + domain (str): Domain to save authentication for (e.g., "github.com") + """ + if not self.browser_context: + raise DendriteException("Browser context not initialized") + domain = get_domain_w_suffix(url) + storage_state = self.browser_context.storage_state() + filtered_state = { + "origins": [ + origin + for origin in storage_state.get("origins", []) + if domain in origin.get("origin", "") + ], + "cookies": [ + cookie + for cookie in storage_state.get("cookies", []) + if domain in cookie.get("domain", "") + ], + } + self._config.storage_cache.set( + {"domain": domain}, StorageState(**filtered_state) + ) + + def setup_auth( + self, + url: str, + message: str = "Please log in to the website. Once done, press Enter to continue...", + ) -> None: + """ + Set up authentication for a specific URL. + + Args: + url (str): URL to navigate to for login + message (str): Message to show while waiting for user input + """ + domain = get_domain_w_suffix(url) + try: + self._playwright = sync_playwright().start() + browser = self._impl.start_browser( + self._playwright, {**self._playwright_options, "headless": False} + ) + self.browser_context = browser.new_context() + self._active_page_manager = PageManager(self, self.browser_context) + self.goto(url) + print(message) + input() + self.save_auth(domain) + finally: + self.close() + + def _get_domain_storage_state(self, domain: str) -> Optional[StorageState]: + """Get storage state for a specific domain""" + return self._config.storage_cache.get({"domain": domain}, index=0) + + def _merge_storage_states(self, states: List[StorageState]) -> StorageState: + """Merge multiple storage states into one""" + merged = {"origins": [], "cookies": []} + seen_origins = set() + seen_cookies = set() + for state in states: + for origin in state.get("origins", []): + origin_key = origin.get("origin", "") + if origin_key not in seen_origins: + merged["origins"].append(origin) + seen_origins.add(origin_key) + for cookie in state.get("cookies", []): + cookie_key = ( + f"{cookie.get('name')}:{cookie.get('domain')}:{cookie.get('path')}" + ) + if cookie_key not in seen_cookies: + merged["cookies"].append(cookie) + seen_cookies.add(cookie_key) + return StorageState(**merged) diff --git a/dendrite/sync_api/_core/dendrite_element.py b/dendrite/browser/sync_api/dendrite_element.py similarity index 85% rename from dendrite/sync_api/_core/dendrite_element.py rename to dendrite/browser/sync_api/dendrite_element.py index d73e788..2ef67a6 100644 --- a/dendrite/sync_api/_core/dendrite_element.py +++ b/dendrite/browser/sync_api/dendrite_element.py @@ -6,16 +6,17 @@ from typing import TYPE_CHECKING, Optional from loguru import logger from playwright.sync_api import Locator -from dendrite.sync_api._api.browser_api_client import BrowserAPIClient -from dendrite._common._exceptions.dendrite_exception import IncorrectOutcomeError +from dendrite.browser._common._exceptions.dendrite_exception import ( + IncorrectOutcomeError, +) +from dendrite.logic import LogicEngine if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_browser import Dendrite -from dendrite.sync_api._core._managers.navigation_tracker import NavigationTracker -from dendrite.sync_api._core.models.page_diff_information import PageDiffInformation -from dendrite.sync_api._core._type_spec import Interaction -from dendrite.sync_api._api.response.interaction_response import InteractionResponse -from dendrite.sync_api._api.dto.make_interaction_dto import MakeInteractionDTO + from .dendrite_browser import Dendrite +from dendrite.models.dto.make_interaction_dto import VerifyActionDTO +from dendrite.models.response.interaction_response import InteractionResponse +from .manager.navigation_tracker import NavigationTracker +from .types import Interaction def perform_action(interaction_type: Interaction): @@ -40,29 +41,28 @@ def wrapper(self: Element, *args, **kwargs) -> InteractionResponse: if not expected_outcome: func(self, *args, **kwargs) return InteractionResponse(status="success", message="") - api_config = self._dendrite_browser.api_config page_before = self._dendrite_browser.get_active_page() page_before_info = page_before.get_page_information() + soup = page_before._get_previous_soup() + screenshot_before = page_before_info.screenshot_base64 + tag_name = soup.find(attrs={"d-id": self.dendrite_id}) func(self, *args, expected_outcome=expected_outcome, **kwargs) self._wait_for_page_changes(page_before.url) page_after = self._dendrite_browser.get_active_page() - page_after_info = page_after.get_page_information() - page_delta_information = PageDiffInformation( - page_before=page_before_info, page_after=page_after_info - ) - dto = MakeInteractionDTO( + screenshot_after = page_after.screenshot_manager.take_full_page_screenshot() + dto = VerifyActionDTO( url=page_before.url, dendrite_id=self.dendrite_id, interaction_type=interaction_type, expected_outcome=expected_outcome, - page_delta_information=page_delta_information, - api_config=api_config, + screenshot_before=screenshot_before, + screenshot_after=screenshot_after, + tag_name=str(tag_name), ) - res = self._browser_api_client.make_interaction(dto) + res = self._browser_api_client.verify_action(dto) if res.status == "failed": raise IncorrectOutcomeError( - message=res.message, - screenshot_base64=page_delta_information.page_after.screenshot_base64, + message=res.message, screenshot_base64=screenshot_after ) return res @@ -84,7 +84,7 @@ def __init__( dendrite_id: str, locator: Locator, dendrite_browser: Dendrite, - browser_api_client: BrowserAPIClient, + browser_api_client: LogicEngine, ): """ Initialize a Element. diff --git a/dendrite/sync_api/_core/dendrite_page.py b/dendrite/browser/sync_api/dendrite_page.py similarity index 88% rename from dendrite/sync_api/_core/dendrite_page.py rename to dendrite/browser/sync_api/dendrite_page.py index b9cd048..d6f2d01 100644 --- a/dendrite/sync_api/_core/dendrite_page.py +++ b/dendrite/browser/sync_api/dendrite_page.py @@ -1,30 +1,30 @@ -import re import time import pathlib +import re import time from typing import TYPE_CHECKING, Any, List, Literal, Optional, Sequence, Union from bs4 import BeautifulSoup, Tag from loguru import logger -from playwright.sync_api import FrameLocator, Keyboard, Download, FilePayload -from dendrite.sync_api._api.browser_api_client import BrowserAPIClient -from dendrite.sync_api._core._js import GENERATE_DENDRITE_IDS_SCRIPT -from dendrite.sync_api._core._type_spec import PlaywrightPage -from dendrite.sync_api._core.dendrite_element import Element -from dendrite.sync_api._core.mixin.ask import AskMixin -from dendrite.sync_api._core.mixin.click import ClickMixin -from dendrite.sync_api._core.mixin.extract import ExtractionMixin -from dendrite.sync_api._core.mixin.fill_fields import FillFieldsMixin -from dendrite.sync_api._core.mixin.get_element import GetElementMixin -from dendrite.sync_api._core.mixin.keyboard import KeyboardMixin -from dendrite.sync_api._core.mixin.markdown import MarkdownMixin -from dendrite.sync_api._core.mixin.wait_for import WaitForMixin -from dendrite.sync_api._core.models.page_information import PageInformation +from playwright.sync_api import Download, FilePayload, FrameLocator, Keyboard +from dendrite.logic import LogicEngine +from dendrite.models.page_information import PageInformation +from .dendrite_element import Element +from .js import GENERATE_DENDRITE_IDS_SCRIPT +from .mixin.ask import AskMixin +from .mixin.click import ClickMixin +from .mixin.extract import ExtractionMixin +from .mixin.fill_fields import FillFieldsMixin +from .mixin.get_element import GetElementMixin +from .mixin.keyboard import KeyboardMixin +from .mixin.markdown import MarkdownMixin +from .mixin.wait_for import WaitForMixin +from .types import PlaywrightPage if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_browser import Dendrite -from dendrite.sync_api._core._managers.screenshot_manager import ScreenshotManager -from dendrite._common._exceptions.dendrite_exception import DendriteException -from dendrite.sync_api._core._utils import expand_iframes + from .dendrite_browser import Dendrite +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from ._utils import expand_iframes +from .manager.screenshot_manager import ScreenshotManager class Page( @@ -48,14 +48,14 @@ def __init__( self, page: PlaywrightPage, dendrite_browser: "Dendrite", - browser_api_client: "BrowserAPIClient", + browser_api_client: LogicEngine, ): self.playwright_page = page self.screenshot_manager = ScreenshotManager(page) - self.dendrite_browser = dendrite_browser self._browser_api_client = browser_api_client self._last_main_frame_url = page.url self._last_frame_navigated_timestamp = time.time() + self._dendrite_browser = dendrite_browser self.playwright_page.on("framenavigated", self._on_frame_navigated) def _on_frame_navigated(self, frame): @@ -63,6 +63,10 @@ def _on_frame_navigated(self, frame): self._last_main_frame_url = frame.url self._last_frame_navigated_timestamp = time.time() + @property + def dendrite_browser(self) -> "Dendrite": + return self._dendrite_browser + @property def url(self): """ @@ -86,10 +90,8 @@ def keyboard(self) -> Keyboard: def _get_page(self) -> "Page": return self - def _get_dendrite_browser(self) -> "Dendrite": - return self.dendrite_browser - - def _get_browser_api_client(self) -> BrowserAPIClient: + @property + def logic_engine(self) -> LogicEngine: return self._browser_api_client def goto( @@ -236,7 +238,7 @@ def _generate_dendrite_ids(self): return except Exception as e: self.playwright_page.wait_for_load_state(state="load", timeout=3000) - logger.debug( + logger.exception( f"Failed to generate dendrite IDs: {e}, attempt {tries + 1}/3" ) tries += 1 diff --git a/dendrite/sync_api/_core/_js/__init__.py b/dendrite/browser/sync_api/js/__init__.py similarity index 100% rename from dendrite/sync_api/_core/_js/__init__.py rename to dendrite/browser/sync_api/js/__init__.py diff --git a/dendrite/sync_api/_core/_js/eventListenerPatch.js b/dendrite/browser/sync_api/js/eventListenerPatch.js similarity index 100% rename from dendrite/sync_api/_core/_js/eventListenerPatch.js rename to dendrite/browser/sync_api/js/eventListenerPatch.js diff --git a/dendrite/async_api/_core/_js/generateDendriteIDs.js b/dendrite/browser/sync_api/js/generateDendriteIDs.js similarity index 97% rename from dendrite/async_api/_core/_js/generateDendriteIDs.js rename to dendrite/browser/sync_api/js/generateDendriteIDs.js index 1d4b348..d03b8cd 100644 --- a/dendrite/async_api/_core/_js/generateDendriteIDs.js +++ b/dendrite/browser/sync_api/js/generateDendriteIDs.js @@ -9,6 +9,7 @@ var hashCode = (str) => { return hash; } + const getElementIndex = (element) => { let index = 1; let sibling = element.previousElementSibling; @@ -42,7 +43,8 @@ const usedHashes = new Map(); var markHidden = (hidden_element) => { // Mark the hidden element itself - hidden + hidden_element.setAttribute('data-hidden', 'true'); + } document.querySelectorAll('*').forEach((element, index) => { diff --git a/dendrite/sync_api/_core/_js/generateDendriteIDsIframe.js b/dendrite/browser/sync_api/js/generateDendriteIDsIframe.js similarity index 100% rename from dendrite/sync_api/_core/_js/generateDendriteIDsIframe.js rename to dendrite/browser/sync_api/js/generateDendriteIDsIframe.js diff --git a/dendrite/async_api/_core/_managers/__init__.py b/dendrite/browser/sync_api/manager/__init__.py similarity index 100% rename from dendrite/async_api/_core/_managers/__init__.py rename to dendrite/browser/sync_api/manager/__init__.py diff --git a/dendrite/sync_api/_core/_managers/navigation_tracker.py b/dendrite/browser/sync_api/manager/navigation_tracker.py similarity index 97% rename from dendrite/sync_api/_core/_managers/navigation_tracker.py rename to dendrite/browser/sync_api/manager/navigation_tracker.py index 8735d05..d789796 100644 --- a/dendrite/sync_api/_core/_managers/navigation_tracker.py +++ b/dendrite/browser/sync_api/manager/navigation_tracker.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Dict, Optional if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_page import Page + from ..dendrite_page import Page class NavigationTracker: diff --git a/dendrite/sync_api/_core/_managers/page_manager.py b/dendrite/browser/sync_api/manager/page_manager.py similarity index 81% rename from dendrite/sync_api/_core/_managers/page_manager.py rename to dendrite/browser/sync_api/manager/page_manager.py index b8e77d8..52b5782 100644 --- a/dendrite/sync_api/_core/_managers/page_manager.py +++ b/dendrite/browser/sync_api/manager/page_manager.py @@ -1,11 +1,11 @@ -from typing import Optional, TYPE_CHECKING +from typing import TYPE_CHECKING, Optional from loguru import logger from playwright.sync_api import BrowserContext, Download, FileChooser if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_browser import Dendrite -from dendrite.sync_api._core._type_spec import PlaywrightPage -from dendrite.sync_api._core.dendrite_page import Page + from ..dendrite_browser import Dendrite +from ..dendrite_page import Page +from ..types import PlaywrightPage class PageManager: @@ -15,13 +15,21 @@ def __init__(self, dendrite_browser, browser_context: BrowserContext): self.active_page: Optional[Page] = None self.browser_context = browser_context self.dendrite_browser: Dendrite = dendrite_browser + existing_pages = browser_context.pages + if existing_pages: + for page in existing_pages: + client = self.dendrite_browser.logic_engine + dendrite_page = Page(page, self.dendrite_browser, client) + self.pages.append(dendrite_page) + if self.active_page is None: + self.active_page = dendrite_page browser_context.on("page", self._page_on_open_handler) def new_page(self) -> Page: new_page = self.browser_context.new_page() if self.active_page and new_page == self.active_page.playwright_page: return self.active_page - client = self.dendrite_browser._get_browser_api_client() + client = self.dendrite_browser.logic_engine dendrite_page = Page(new_page, self.dendrite_browser, client) self.pages.append(dendrite_page) self.active_page = dendrite_page @@ -68,7 +76,7 @@ def _page_on_open_handler(self, page: PlaywrightPage): page.on("crash", self._page_on_crash_handler) page.on("download", self._page_on_download_handler) page.on("filechooser", self._page_on_filechooser_handler) - client = self.dendrite_browser._get_browser_api_client() + client = self.dendrite_browser.logic_engine dendrite_page = Page(page, self.dendrite_browser, client) self.pages.append(dendrite_page) self.active_page = dendrite_page diff --git a/dendrite/sync_api/_core/_managers/screenshot_manager.py b/dendrite/browser/sync_api/manager/screenshot_manager.py similarity index 96% rename from dendrite/sync_api/_core/_managers/screenshot_manager.py rename to dendrite/browser/sync_api/manager/screenshot_manager.py index a6f36b1..7f4fd33 100644 --- a/dendrite/sync_api/_core/_managers/screenshot_manager.py +++ b/dendrite/browser/sync_api/manager/screenshot_manager.py @@ -1,7 +1,7 @@ import base64 import os from uuid import uuid4 -from dendrite.sync_api._core._type_spec import PlaywrightPage +from ..types import PlaywrightPage class ScreenshotManager: diff --git a/dendrite/browser/sync_api/mixin/__init__.py b/dendrite/browser/sync_api/mixin/__init__.py new file mode 100644 index 0000000..046a61c --- /dev/null +++ b/dendrite/browser/sync_api/mixin/__init__.py @@ -0,0 +1,21 @@ +from .ask import AskMixin +from .click import ClickMixin +from .extract import ExtractionMixin +from .fill_fields import FillFieldsMixin +from .get_element import GetElementMixin +from .keyboard import KeyboardMixin +from .markdown import MarkdownMixin +from .screenshot import ScreenshotMixin +from .wait_for import WaitForMixin + +__all__ = [ + "AskMixin", + "ClickMixin", + "ExtractionMixin", + "FillFieldsMixin", + "GetElementMixin", + "KeyboardMixin", + "MarkdownMixin", + "ScreenshotMixin", + "WaitForMixin", +] diff --git a/dendrite/sync_api/_core/mixin/ask.py b/dendrite/browser/sync_api/mixin/ask.py similarity index 93% rename from dendrite/sync_api/_core/mixin/ask.py rename to dendrite/browser/sync_api/mixin/ask.py index ca028f8..57f4a56 100644 --- a/dendrite/sync_api/_core/mixin/ask.py +++ b/dendrite/browser/sync_api/mixin/ask.py @@ -2,16 +2,11 @@ import time from typing import Optional, Type, overload from loguru import logger -from dendrite.sync_api._api.dto.ask_page_dto import AskPageDTO -from dendrite.sync_api._core._type_spec import ( - JsonSchema, - PydanticModel, - TypeSpec, - convert_to_type_spec, - to_json_schema, -) -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser.sync_api._utils import convert_to_type_spec, to_json_schema +from dendrite.models.dto.ask_page_dto import AskPageDTO +from ..protocol.page_protocol import DendritePageProtocol +from ..types import JsonSchema, PydanticModel, TypeSpec TIMEOUT_INTERVAL = [150, 450, 1000] @@ -129,7 +124,6 @@ def ask( Raises: DendriteException: If the request fails, the exception includes the failure message and a screenshot. """ - api_config = self._get_dendrite_browser().api_config start_time = time.time() attempt_start = start_time attempt = -1 @@ -165,12 +159,11 @@ def ask( entire_prompt = prompt + time_prompt dto = AskPageDTO( page_information=page_information, - api_config=api_config, prompt=entire_prompt, return_schema=schema, ) try: - res = self._get_browser_api_client().ask_page(dto) + res = self.logic_engine.ask_page(dto) logger.debug(f"Got response in {time.time() - attempt_start} seconds") if res.status == "error": logger.warning( diff --git a/dendrite/sync_api/_core/mixin/click.py b/dendrite/browser/sync_api/mixin/click.py similarity index 84% rename from dendrite/sync_api/_core/mixin/click.py rename to dendrite/browser/sync_api/mixin/click.py index 097eccb..2f8461b 100644 --- a/dendrite/sync_api/_core/mixin/click.py +++ b/dendrite/browser/sync_api/mixin/click.py @@ -1,9 +1,8 @@ -import time -from typing import Any, Optional -from dendrite.sync_api._api.response.interaction_response import InteractionResponse -from dendrite.sync_api._core.mixin.get_element import GetElementMixin -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from typing import Optional +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from dendrite.models.response.interaction_response import InteractionResponse +from ..mixin.get_element import GetElementMixin +from ..protocol.page_protocol import DendritePageProtocol class ClickMixin(GetElementMixin, DendritePageProtocol): diff --git a/dendrite/browser/sync_api/mixin/extract.py b/dendrite/browser/sync_api/mixin/extract.py new file mode 100644 index 0000000..e5bf411 --- /dev/null +++ b/dendrite/browser/sync_api/mixin/extract.py @@ -0,0 +1,279 @@ +import time +import time +from typing import Any, Callable, List, Optional, Type, overload +from loguru import logger +from dendrite.browser.sync_api._utils import convert_to_type_spec, to_json_schema +from dendrite.logic.code.code_session import execute +from dendrite.models.dto.cached_extract_dto import CachedExtractDTO +from dendrite.models.dto.extract_dto import ExtractDTO +from dendrite.models.response.extract_response import ExtractResponse +from dendrite.models.scripts import Script +from ..manager.navigation_tracker import NavigationTracker +from ..protocol.page_protocol import DendritePageProtocol +from ..types import JsonSchema, PydanticModel, TypeSpec + +CACHE_TIMEOUT = 5 + + +class ExtractionMixin(DendritePageProtocol): + """ + Mixin that provides extraction functionality for web pages. + + This mixin provides various `extract` methods that allow extracting + different types of data (e.g., bool, int, float, string, Pydantic models, etc.) + from a web page based on a given prompt. + """ + + @overload + def extract( + self, + prompt: str, + type_spec: Type[bool], + use_cache: bool = True, + timeout: int = 180, + ) -> bool: ... + + @overload + def extract( + self, + prompt: str, + type_spec: Type[int], + use_cache: bool = True, + timeout: int = 180, + ) -> int: ... + + @overload + def extract( + self, + prompt: str, + type_spec: Type[float], + use_cache: bool = True, + timeout: int = 180, + ) -> float: ... + + @overload + def extract( + self, + prompt: str, + type_spec: Type[str], + use_cache: bool = True, + timeout: int = 180, + ) -> str: ... + + @overload + def extract( + self, + prompt: Optional[str], + type_spec: Type[PydanticModel], + use_cache: bool = True, + timeout: int = 180, + ) -> PydanticModel: ... + + @overload + def extract( + self, + prompt: Optional[str], + type_spec: JsonSchema, + use_cache: bool = True, + timeout: int = 180, + ) -> JsonSchema: ... + + @overload + def extract( + self, + prompt: str, + type_spec: None = None, + use_cache: bool = True, + timeout: int = 180, + ) -> Any: ... + + def extract( + self, + prompt: Optional[str], + type_spec: Optional[TypeSpec] = None, + use_cache: bool = True, + timeout: int = 180, + ) -> TypeSpec: + """ + Extract data from a web page based on a prompt and optional type specification. + Args: + prompt (Optional[str]): The prompt to describe the information to extract. + type_spec (Optional[TypeSpec], optional): The type specification for the extracted data. + use_cache (bool, optional): Whether to use cached results. Defaults to True. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached scripts before falling back to the + extraction agent for the remaining time that will attempt to generate a new script. Defaults to 15000 (15 seconds). + + Returns: + ExtractResponse: The extracted data wrapped in a ExtractResponse object. + Raises: + TimeoutError: If the extraction process exceeds the specified timeout. + """ + logger.info(f"Starting extraction with prompt: {prompt}") + json_schema = None + if type_spec: + json_schema = to_json_schema(type_spec) + logger.debug(f"Type specification converted to JSON schema: {json_schema}") + if prompt is None: + prompt = "" + start_time = time.time() + page = self._get_page() + navigation_tracker = NavigationTracker(page) + navigation_tracker.start_nav_tracking() + if use_cache: + logger.info("Testing cache") + cached_result = self._try_cached_extraction(prompt, json_schema) + if cached_result: + return convert_and_return_result(cached_result, type_spec) + logger.info( + "Using extraction agent to perform extraction, since no cache was found or failed." + ) + result = self._extract_with_agent( + prompt, json_schema, timeout - (time.time() - start_time) + ) + if result: + return convert_and_return_result(result, type_spec) + logger.error(f"Extraction failed after {time.time() - start_time:.2f} seconds") + return None + + def _try_cached_extraction( + self, prompt: str, json_schema: Optional[JsonSchema] + ) -> Optional[ExtractResponse]: + """ + Attempts to extract data using cached scripts with exponential backoff. + Only tries up to 5 most recent scripts. + + Args: + prompt: The prompt describing what to extract + json_schema: Optional JSON schema for type validation + + Returns: + ExtractResponse if successful, None otherwise + """ + page = self._get_page() + dto = CachedExtractDTO(url=page.url, prompt=prompt) + scripts = self.logic_engine.get_cached_scripts(dto) + logger.debug(f"Found {len(scripts)} scripts in cache, {scripts}") + if len(scripts) == 0: + logger.debug( + f"No scripts found in cache for prompt: {prompt} in domain: {page.url}" + ) + return None + + def try_cached_extract(): + page = self._get_page() + soup = page._get_soup() + recent_scripts = scripts[-min(5, len(scripts)) :] + for script in recent_scripts: + res = test_script(script, str(soup), json_schema) + if res is not None: + return ExtractResponse( + status="success", + message="Re-used a preexisting script from cache with the same specifications.", + return_data=res, + created_script=script.script, + ) + return None + + return _attempt_with_backoff_helper( + "cached_extraction", try_cached_extract, CACHE_TIMEOUT + ) + + def _extract_with_agent( + self, prompt: str, json_schema: Optional[JsonSchema], remaining_timeout: float + ) -> Optional[ExtractResponse]: + """ + Attempts to extract data using the extraction agent with exponential backoff. + + Args: + prompt: The prompt describing what to extract + json_schema: Optional JSON schema for type validation + remaining_timeout: Maximum time to spend on extraction + + Returns: + ExtractResponse if successful, None otherwise + """ + + def try_extract_with_agent(): + page = self._get_page() + page_information = page.get_page_information(include_screenshot=True) + extract_dto = ExtractDTO( + page_information=page_information, + prompt=prompt, + return_data_json_schema=json_schema, + use_screenshot=True, + ) + res: ExtractResponse = self.logic_engine.extract(extract_dto) + if res.status == "impossible": + logger.error(f"Impossible to extract data. Reason: {res.message}") + return None + if res.status == "success": + logger.success(f"Extraction successful: '{res.message}'") + return res + return None + + return _attempt_with_backoff_helper( + "extraction_agent", try_extract_with_agent, remaining_timeout + ) + + +def _attempt_with_backoff_helper( + operation_name: str, + operation: Callable, + timeout: float, + backoff_intervals: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0], +) -> Optional[Any]: + """ + Generic helper function that implements exponential backoff for operations. + + Args: + operation_name: Name of the operation for logging + operation: Async function to execute + timeout: Maximum time to spend attempting the operation + backoff_intervals: List of timeouts between attempts + + Returns: + The result of the operation if successful, None otherwise + """ + total_elapsed_time = 0 + start_time = time.time() + for i, current_timeout in enumerate(backoff_intervals): + if total_elapsed_time >= timeout: + logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") + return None + request_start_time = time.time() + result = operation() + request_duration = time.time() - request_start_time + if result: + return result + sleep_duration = max(0, current_timeout - request_duration) + logger.info( + f"{operation_name} attempt {i + 1} failed. Sleeping for {sleep_duration:.2f} seconds" + ) + time.sleep(sleep_duration) + total_elapsed_time = time.time() - start_time + logger.error( + f"All {operation_name} attempts failed after {total_elapsed_time:.2f} seconds" + ) + return None + + +def convert_and_return_result( + res: ExtractResponse, type_spec: Optional[TypeSpec] +) -> TypeSpec: + converted_res = res.return_data + if type_spec is not None: + logger.debug("Converting extraction result to specified type") + converted_res = convert_to_type_spec(type_spec, res.return_data) + logger.info("Extraction process completed successfully") + return converted_res + + +def test_script( + script: Script, raw_html: str, return_data_json_schema: Any +) -> Optional[Any]: + try: + res = execute(script.script, raw_html, return_data_json_schema) + return res + except Exception as e: + logger.debug(f"Script failed with error: {str(e)} ") diff --git a/dendrite/sync_api/_core/mixin/fill_fields.py b/dendrite/browser/sync_api/mixin/fill_fields.py similarity index 90% rename from dendrite/sync_api/_core/mixin/fill_fields.py rename to dendrite/browser/sync_api/mixin/fill_fields.py index 792ab24..4a4880f 100644 --- a/dendrite/sync_api/_core/mixin/fill_fields.py +++ b/dendrite/browser/sync_api/mixin/fill_fields.py @@ -1,9 +1,9 @@ import time from typing import Any, Dict, Optional -from dendrite.sync_api._api.response.interaction_response import InteractionResponse -from dendrite.sync_api._core.mixin.get_element import GetElementMixin -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from dendrite.models.response.interaction_response import InteractionResponse +from ..mixin.get_element import GetElementMixin +from ..protocol.page_protocol import DendritePageProtocol class FillFieldsMixin(GetElementMixin, DendritePageProtocol): diff --git a/dendrite/browser/sync_api/mixin/get_element.py b/dendrite/browser/sync_api/mixin/get_element.py new file mode 100644 index 0000000..84e2b37 --- /dev/null +++ b/dendrite/browser/sync_api/mixin/get_element.py @@ -0,0 +1,251 @@ +import time +import time +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Literal, + Optional, + Union, + overload, +) +from bs4 import BeautifulSoup +from loguru import logger +from .._utils import _get_all_elements_from_selector_soup +from ..dendrite_element import Element + +if TYPE_CHECKING: + from ..dendrite_page import Page +from dendrite.models.dto.cached_selector_dto import CachedSelectorDTO +from dendrite.models.dto.get_elements_dto import GetElementsDTO +from ..protocol.page_protocol import DendritePageProtocol + +CACHE_TIMEOUT = 5 + + +class GetElementMixin(DendritePageProtocol): + + def get_element( + self, prompt: str, use_cache=True, timeout=15000 + ) -> Optional[Element]: + """ + Retrieves a single Dendrite element based on the provided prompt. + + Args: + prompt (str): The prompt describing the element to be retrieved. + use_cache (bool, optional): Whether to use cached results. Defaults to True. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + Element: The retrieved element. + """ + return self._get_element( + prompt, only_one=True, use_cache=use_cache, timeout=timeout / 1000 + ) + + @overload + def _get_element( + self, prompt_or_elements: str, only_one: Literal[True], use_cache: bool, timeout + ) -> Optional[Element]: + """ + Retrieves a single Dendrite element based on the provided prompt. + + Args: + prompt (Union[str, Dict[str, str]]): The prompt describing the element to be retrieved. + only_one (Literal[True]): Indicates that only one element should be retrieved. + use_cache (bool): Whether to use cached results. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + Element: The retrieved element. + """ + + @overload + def _get_element( + self, + prompt_or_elements: str, + only_one: Literal[False], + use_cache: bool, + timeout, + ) -> List[Element]: + """ + Retrieves a list of Dendrite elements based on the provided prompt. + + Args: + prompt (str): The prompt describing the elements to be retrieved. + only_one (Literal[False]): Indicates that multiple elements should be retrieved. + use_cache (bool): Whether to use cached results. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + List[Element]: A list of retrieved elements. + """ + + def _get_element( + self, prompt_or_elements: str, only_one: bool, use_cache: bool, timeout: float + ) -> Union[Optional[Element], List[Element]]: + """ + Retrieves Dendrite elements based on the provided prompt, either a single element or a list of elements. + + This method sends a request with the prompt and retrieves the elements based on the `only_one` flag. + + Args: + prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval. + only_one (bool): Whether to retrieve only one element or a list of elements. + use_cache (bool): Whether to use cached results. + timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, + up to 5000ms will be spent attempting to use cached selectors before falling back to the + find element agent for the remaining time. Defaults to 15000 (15 seconds). + + Returns: + Union[Element, List[Element], ElementsResponse]: The retrieved element, list of elements, or response object. + """ + logger.info(f"Getting element for prompt: '{prompt_or_elements}'") + start_time = time.time() + page = self._get_page() + soup = page._get_soup() + if use_cache: + cached_elements = self._try_cached_selectors( + page, soup, prompt_or_elements, only_one + ) + if cached_elements: + return cached_elements + logger.info( + "Proceeding to use the find element agent to find the requested elements." + ) + res = try_get_element( + self, + prompt_or_elements, + only_one, + remaining_timeout=timeout - (time.time() - start_time), + ) + if res: + return res + logger.error( + f"Failed to retrieve elements within the specified timeout of {timeout} seconds" + ) + return None + + def _try_cached_selectors( + self, page: "Page", soup: BeautifulSoup, prompt: str, only_one: bool + ) -> Union[Optional[Element], List[Element]]: + """ + Attempts to retrieve elements using cached selectors with exponential backoff. + + Args: + page: The current page object + soup: The BeautifulSoup object of the current page + prompt: The prompt to search for + only_one: Whether to return only one element + + Returns: + The found elements if successful, None otherwise + """ + dto = CachedSelectorDTO(url=page.url, prompt=prompt) + selectors = self.logic_engine.get_cached_selectors(dto) + if len(selectors) == 0: + logger.debug("No cached selectors found") + return None + logger.debug("Attempting to use cached selectors with backoff") + recent_selectors = selectors[-min(5, len(selectors)) :] + str_selectors = list(map(lambda x: x.selector, recent_selectors)) + + def try_cached_selectors(): + return get_elements_from_selectors_soup(page, soup, str_selectors, only_one) + + return _attempt_with_backoff_helper( + "cached_selectors", try_cached_selectors, timeout=CACHE_TIMEOUT + ) + + +def _attempt_with_backoff_helper( + operation_name: str, + operation: Callable, + timeout: float, + backoff_intervals: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0], +) -> Optional[Any]: + """ + Generic helper function that implements exponential backoff for operations. + + Args: + operation_name: Name of the operation for logging + operation: Async function to execute + timeout: Maximum time to spend attempting the operation + backoff_intervals: List of timeouts between attempts + + Returns: + The result of the operation if successful, None otherwise + """ + total_elapsed_time = 0 + start_time = time.time() + for i, current_timeout in enumerate(backoff_intervals): + if total_elapsed_time >= timeout: + logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") + return None + request_start_time = time.time() + result = operation() + request_duration = time.time() - request_start_time + if result: + return result + sleep_duration = max(0, current_timeout - request_duration) + logger.info( + f"{operation_name} attempt {i + 1} failed. Sleeping for {sleep_duration:.2f} seconds" + ) + time.sleep(sleep_duration) + total_elapsed_time = time.time() - start_time + logger.error( + f"All {operation_name} attempts failed after {total_elapsed_time:.2f} seconds" + ) + return None + + +def try_get_element( + obj: DendritePageProtocol, + prompt_or_elements: Union[str, Dict[str, str]], + only_one: bool, + remaining_timeout: float, +) -> Union[Optional[Element], List[Element]]: + + def _try_get_element(): + page = obj._get_page() + page_information = page.get_page_information() + dto = GetElementsDTO( + page_information=page_information, + prompt=prompt_or_elements, + only_one=only_one, + ) + res = obj.logic_engine.get_element(dto) + if res.status == "impossible": + logger.error( + f"Impossible to get elements for '{prompt_or_elements}'. Reason: {res.message}" + ) + return None + if res.status == "success": + logger.success(f"d[id]: {res.d_id} Selectors:{res.selectors}") + if res.selectors is not None: + return get_elements_from_selectors_soup( + page, page._get_previous_soup(), res.selectors, only_one + ) + return None + + return _attempt_with_backoff_helper( + "find_element_agent", _try_get_element, remaining_timeout + ) + + +def get_elements_from_selectors_soup( + page: "Page", soup: BeautifulSoup, selectors: List[str], only_one: bool +) -> Union[Optional[Element], List[Element]]: + for selector in reversed(selectors): + dendrite_elements = _get_all_elements_from_selector_soup(selector, soup, page) + if len(dendrite_elements) > 0: + return dendrite_elements[0] if only_one else dendrite_elements + return None diff --git a/dendrite/sync_api/_core/mixin/keyboard.py b/dendrite/browser/sync_api/mixin/keyboard.py similarity index 90% rename from dendrite/sync_api/_core/mixin/keyboard.py rename to dendrite/browser/sync_api/mixin/keyboard.py index e3ed73a..2f1c882 100644 --- a/dendrite/sync_api/_core/mixin/keyboard.py +++ b/dendrite/browser/sync_api/mixin/keyboard.py @@ -1,6 +1,6 @@ -from typing import Any, Union, Literal -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import DendriteException +from typing import Literal, Union +from dendrite.browser._common._exceptions.dendrite_exception import DendriteException +from ..protocol.page_protocol import DendritePageProtocol class KeyboardMixin(DendritePageProtocol): diff --git a/dendrite/sync_api/_core/mixin/markdown.py b/dendrite/browser/sync_api/mixin/markdown.py similarity index 88% rename from dendrite/sync_api/_core/mixin/markdown.py rename to dendrite/browser/sync_api/mixin/markdown.py index f094330..193a1bb 100644 --- a/dendrite/sync_api/_core/mixin/markdown.py +++ b/dendrite/browser/sync_api/mixin/markdown.py @@ -1,9 +1,9 @@ +import re from typing import Optional from bs4 import BeautifulSoup -import re -from dendrite.sync_api._core.mixin.extract import ExtractionMixin -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol from markdownify import markdownify as md +from ..mixin.extract import ExtractionMixin +from ..protocol.page_protocol import DendritePageProtocol class MarkdownMixin(ExtractionMixin, DendritePageProtocol): diff --git a/dendrite/sync_api/_core/mixin/screenshot.py b/dendrite/browser/sync_api/mixin/screenshot.py similarity index 88% rename from dendrite/sync_api/_core/mixin/screenshot.py rename to dendrite/browser/sync_api/mixin/screenshot.py index 3495b4c..5cc621e 100644 --- a/dendrite/sync_api/_core/mixin/screenshot.py +++ b/dendrite/browser/sync_api/mixin/screenshot.py @@ -1,4 +1,4 @@ -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol +from ..protocol.page_protocol import DendritePageProtocol class ScreenshotMixin(DendritePageProtocol): diff --git a/dendrite/sync_api/_core/mixin/wait_for.py b/dendrite/browser/sync_api/mixin/wait_for.py similarity index 87% rename from dendrite/sync_api/_core/mixin/wait_for.py rename to dendrite/browser/sync_api/mixin/wait_for.py index 76cac15..ccc5dfd 100644 --- a/dendrite/sync_api/_core/mixin/wait_for.py +++ b/dendrite/browser/sync_api/mixin/wait_for.py @@ -1,10 +1,12 @@ import time import time -from dendrite.sync_api._core.mixin.ask import AskMixin -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite._common._exceptions.dendrite_exception import PageConditionNotMet -from dendrite._common._exceptions.dendrite_exception import DendriteException from loguru import logger +from dendrite.browser._common._exceptions.dendrite_exception import ( + DendriteException, + PageConditionNotMet, +) +from ..mixin.ask import AskMixin +from ..protocol.page_protocol import DendritePageProtocol class WaitForMixin(AskMixin, DendritePageProtocol): diff --git a/dendrite/async_api/_core/models/__init__.py b/dendrite/browser/sync_api/protocol/__init__.py similarity index 100% rename from dendrite/async_api/_core/models/__init__.py rename to dendrite/browser/sync_api/protocol/__init__.py diff --git a/dendrite/browser/sync_api/protocol/browser_protocol.py b/dendrite/browser/sync_api/protocol/browser_protocol.py new file mode 100644 index 0000000..f708e61 --- /dev/null +++ b/dendrite/browser/sync_api/protocol/browser_protocol.py @@ -0,0 +1,61 @@ +from typing import TYPE_CHECKING, Optional, Protocol, Union +from typing_extensions import Literal +from dendrite.browser.remote import Providers + +if TYPE_CHECKING: + from ..dendrite_browser import Dendrite +from playwright.sync_api import Browser, Download, Playwright +from ..types import PlaywrightPage + + +class BrowserProtocol(Protocol): + + def __init__(self, settings: Providers) -> None: ... + + def get_download( + self, dendrite_browser: "Dendrite", pw_page: PlaywrightPage, timeout: float + ) -> Download: + """ + Retrieves the download event from the browser. + + Returns: + Download: The download event. + + Raises: + Exception: If there is an issue retrieving the download event. + """ + ... + + def start_browser(self, playwright: Playwright, pw_options: dict) -> Browser: + """ + Starts the browser session. + + Args: + playwright: The playwright instance + pw_options: Playwright launch options + + Returns: + Browser: A Browser instance + """ + ... + + def configure_context(self, browser: "Dendrite") -> None: + """ + Configures the browser context. + + Args: + browser (Dendrite): The browser to configure. + + Raises: + Exception: If there is an issue configuring the browser context. + """ + ... + + def stop_session(self) -> None: + """ + Stops the browser session. + + Raises: + Exception: If there is an issue stopping the browser session. + """ + ... diff --git a/dendrite/sync_api/_core/models/download_interface.py b/dendrite/browser/sync_api/protocol/download_protocol.py similarity index 100% rename from dendrite/sync_api/_core/models/download_interface.py rename to dendrite/browser/sync_api/protocol/download_protocol.py diff --git a/dendrite/browser/sync_api/protocol/page_protocol.py b/dendrite/browser/sync_api/protocol/page_protocol.py new file mode 100644 index 0000000..d12b839 --- /dev/null +++ b/dendrite/browser/sync_api/protocol/page_protocol.py @@ -0,0 +1,21 @@ +from typing import TYPE_CHECKING, Protocol +from dendrite.logic import LogicEngine + +if TYPE_CHECKING: + from ..dendrite_browser import Dendrite + from ..dendrite_page import Page + + +class DendritePageProtocol(Protocol): + """ + Protocol that specifies the required methods and attributes + for the `ExtractionMixin` to work. + """ + + @property + def logic_engine(self) -> LogicEngine: ... + + @property + def dendrite_browser(self) -> "Dendrite": ... + + def _get_page(self) -> "Page": ... diff --git a/dendrite/browser/sync_api/types.py b/dendrite/browser/sync_api/types.py new file mode 100644 index 0000000..de26bef --- /dev/null +++ b/dendrite/browser/sync_api/types.py @@ -0,0 +1,12 @@ +import inspect +from typing import Any, Dict, Literal, Type, TypeVar, Union +from playwright.sync_api import Page +from pydantic import BaseModel + +Interaction = Literal["click", "fill", "hover"] +T = TypeVar("T") +PydanticModel = TypeVar("PydanticModel", bound=BaseModel) +PrimitiveTypes = PrimitiveTypes = Union[Type[bool], Type[int], Type[float], Type[str]] +JsonSchema = Dict[str, Any] +TypeSpec = Union[PrimitiveTypes, PydanticModel, JsonSchema] +PlaywrightPage = Page diff --git a/dendrite/exceptions/__init__.py b/dendrite/exceptions/__init__.py index fa5ff25..ad0fbf7 100644 --- a/dendrite/exceptions/__init__.py +++ b/dendrite/exceptions/__init__.py @@ -1,11 +1,11 @@ -from .._common._exceptions.dendrite_exception import ( +from ..browser._common._exceptions.dendrite_exception import ( BaseDendriteException, + BrowserNotLaunchedError, DendriteException, IncorrectOutcomeError, InvalidAuthSessionError, MissingApiKeyError, PageConditionNotMet, - BrowserNotLaunchedError, ) __all__ = [ diff --git a/dendrite/logic/__init__.py b/dendrite/logic/__init__.py new file mode 100644 index 0000000..4c2737c --- /dev/null +++ b/dendrite/logic/__init__.py @@ -0,0 +1,4 @@ +from .async_logic_engine import AsyncLogicEngine +from .sync_logic_engine import LogicEngine + +__all__ = ["LogicEngine", "AsyncLogicEngine"] diff --git a/dendrite/async_api/_dom/__init__.py b/dendrite/logic/ask/__init__.py similarity index 100% rename from dendrite/async_api/_dom/__init__.py rename to dendrite/logic/ask/__init__.py diff --git a/dendrite/logic/ask/ask.py b/dendrite/logic/ask/ask.py new file mode 100644 index 0000000..af7e71e --- /dev/null +++ b/dendrite/logic/ask/ask.py @@ -0,0 +1,231 @@ +import re +from typing import List + +import json_repair +from jsonschema import validate +from openai.types.chat.chat_completion_content_part_param import ( + ChatCompletionContentPartParam, +) + +from dendrite.logic.config import Config +from dendrite.logic.llm.agent import Agent, Message +from dendrite.models.dto.ask_page_dto import AskPageDTO +from dendrite.models.response.ask_page_response import AskPageResponse + +from .image import segment_image + + +async def ask_page_action(ask_page_dto: AskPageDTO, config: Config) -> AskPageResponse: + image_segments = segment_image( + ask_page_dto.page_information.screenshot_base64, segment_height=2000 + ) + + agent = Agent(config.llm_config.get("ask_page_agent")) + scrolled_to_segment_i = 0 + content = generate_ask_page_prompt(ask_page_dto, image_segments) + messages: List[Message] = [ + {"role": "user", "content": content}, + ] + + max_iterations = len(image_segments) + 5 + iteration = 0 + while iteration < max_iterations: + iteration += 1 + + text = await agent.call_llm(messages) + messages.append( + { + "role": "assistant", + "content": text, + } + ) + + json_pattern = r"```json(.*?)```" + + if not text: + continue + + json_matches = re.findall(json_pattern, text, re.DOTALL) + + if len(json_matches) == 0: + continue + + extracted_json = json_matches[0].strip() + data_dict = json_repair.loads(extracted_json) + + if not isinstance(data_dict, dict): + content = "Your message doesn't contain a correctly formatted json object, try again." + messages.append({"role": "user", "content": content}) + continue + + if "scroll_down" in data_dict: + next = scrolled_to_segment_i + 1 + if next < len(image_segments): + content = generate_scroll_prompt(image_segments, next) + else: + content = "You cannot scroll any further." + messages.append({"role": "user", "content": content}) + continue + + elif "return_data" in data_dict and "description" in data_dict: + return_data = data_dict["return_data"] + try: + if ask_page_dto.return_schema: + validate(instance=return_data, schema=ask_page_dto.return_schema) + except Exception as e: + err_message = "Your return data doesn't match the requested return json schema, try again. Exception: {e}" + messages.append( + { + "role": "user", + "content": err_message, + } + ) + continue + + return AskPageResponse( + status="success", + return_data=data_dict["return_data"], + description=data_dict["description"], + ) + + elif "error" in data_dict: + was_blocked = data_dict.get("was_blocked_by_recaptcha", False) + return AskPageResponse( + status="error", + return_data=data_dict["error"], + description=f'{data_dict["error"]}, was_blocked_by_recaptcha: {was_blocked}', + ) + + else: + err_message = ( + "Your message doesn't contain a correctly formatted action, try again." + ) + messages.append( + { + "role": "user", + "content": err_message, + } + ) + + return AskPageResponse( + status="error", + return_data="Scrolled through the entire page without finding the requested data.", + description="", + ) + + +def generate_ask_page_prompt( + ask_page_dto: AskPageDTO, image_segments: list, scrolled_to_segment_i: int = 0 +) -> List[ChatCompletionContentPartParam]: + # Generate scroll down hint based on number of segments + scroll_down_hint = ( + "" + if len(image_segments) == 1 + else """ + +If you think need to scroll further down, output an object with the key scroll down and nothing else: + +Action Message: +[Short reasoning first] +```json +{ + "scroll_down": true +} +``` + +You can keep scrolling down, noting important details, until you are ready to return the requested data, which you would do in a separate message.""" + ) + + # Get return schema prompt + return_schema_prompt = ( + str(ask_page_dto.return_schema) + if ask_page_dto.return_schema + else "No schema specified by the user" + ) + + # Construct the main prompt content + content: List[ChatCompletionContentPartParam] = [ + { + "type": "text", + "text": f"""Please look at the page and return data that matches the requested schema and prompt. + + +{ask_page_dto.prompt} + + + +{return_schema_prompt} + + +Look the viewport and decide on the next action: + +If you can solve the prompt and return the requested data from the viewport, output a message with tripple backticks and 'json' like in the example below. Make sure `return_data` matches the requested return schema: + +Action Message: +[Short reasoning first] +```json +{{ + "description": "E.g There is a red button with the text 'get started' positoned underneath the title 'welcome!'", + "return_data": {{"element_exists": true, "foo": "bar"}}, +}} +``` + +Remember, `return_data` should be json that matches the structure of the requested json schema if available. Don't forget to include a description.{scroll_down_hint} + +In case you think the data is not available on the current page and the task does not describe how to handle the non-available data, or the page is blocked by a captcha puzzle or similar, output a json with a short error message, like this: + +Action Message: +[Short reasoning first.] +```json +{{ + "error": "reason why the task cannot be completed here", + "was_blocked_by_recaptcha": true/false +}} +``` + +Here is a screenshot of the viewport:""", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_segments[scrolled_to_segment_i]}" + }, + }, + ] + + return content + + +def generate_scroll_prompt( + image_segments: list, next_segment: int +) -> List[ChatCompletionContentPartParam]: + """ + Generates the prompt for scrolling to next segment. + + Args: + image_segments: List of image segments + next_segment: Index of next segment + + Returns: + List of message content blocks + """ + last_segment_reminder = ( + " You won't be able to scroll further now." + if next_segment == len(image_segments) - 1 + else "" + ) + + content = [ + { + "type": "text", + "text": f"""You have scrolled down. You are viewing segment {next_segment+1}/{len(image_segments)}.{last_segment_reminder} Here is the new viewport:""", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_segments[next_segment]}" + }, + }, + ] + + return content diff --git a/dendrite/logic/ask/image.py b/dendrite/logic/ask/image.py new file mode 100644 index 0000000..6a61566 --- /dev/null +++ b/dendrite/logic/ask/image.py @@ -0,0 +1,35 @@ +import base64 +import io +from typing import List + +from loguru import logger +from PIL import Image + + +def segment_image( + base64_image: str, + segment_height: int = 7900, +) -> List[str]: + if len(base64_image) < 100: + raise Exception("Failed to segment image since it is too small / glitched.") + + image_data = base64.b64decode(base64_image) + image = Image.open(io.BytesIO(image_data)) + width, height = image.size + segments = [] + + for i in range(0, height, segment_height): + # Define the box for cropping (left, upper, right, lower) + box = (0, i, width, min(i + segment_height, height)) + segment = image.crop(box) + + # Convert RGBA to RGB if necessary + if segment.mode == "RGBA": + segment = segment.convert("RGB") + + buffer = io.BytesIO() + segment.save(buffer, format="JPEG") + segment_data = buffer.getvalue() + segments.append(base64.b64encode(segment_data).decode()) + + return segments diff --git a/dendrite/logic/async_logic_engine.py b/dendrite/logic/async_logic_engine.py new file mode 100644 index 0000000..38915bd --- /dev/null +++ b/dendrite/logic/async_logic_engine.py @@ -0,0 +1,42 @@ +from typing import List, Optional, Protocol + +from dendrite.logic.ask import ask +from dendrite.logic.config import Config +from dendrite.logic.extract import extract +from dendrite.logic.get_element import get_element +from dendrite.logic.verify_interaction import verify_interaction +from dendrite.models.dto.ask_page_dto import AskPageDTO +from dendrite.models.dto.cached_extract_dto import CachedExtractDTO +from dendrite.models.dto.cached_selector_dto import CachedSelectorDTO +from dendrite.models.dto.extract_dto import ExtractDTO +from dendrite.models.dto.get_elements_dto import GetElementsDTO +from dendrite.models.dto.make_interaction_dto import VerifyActionDTO +from dendrite.models.response.ask_page_response import AskPageResponse +from dendrite.models.response.extract_response import ExtractResponse +from dendrite.models.response.get_element_response import GetElementResponse +from dendrite.models.response.interaction_response import InteractionResponse +from dendrite.models.scripts import Script +from dendrite.models.selector import Selector + + +class AsyncLogicEngine: + def __init__(self, config: Config): + self._config = config + + async def get_element(self, dto: GetElementsDTO) -> GetElementResponse: + return await get_element.get_element(dto, self._config) + + async def get_cached_selectors(self, dto: CachedSelectorDTO) -> List[Selector]: + return await get_element.get_cached_selector(dto, self._config) + + async def get_cached_scripts(self, dto: CachedExtractDTO) -> List[Script]: + return await extract.get_cached_scripts(dto, self._config) + + async def extract(self, dto: ExtractDTO) -> ExtractResponse: + return await extract.extract(dto, self._config) + + async def verify_action(self, dto: VerifyActionDTO) -> InteractionResponse: + return await verify_interaction.verify_action(dto, self._config) + + async def ask_page(self, dto: AskPageDTO) -> AskPageResponse: + return await ask.ask_page_action(dto, self._config) diff --git a/dendrite/sync_api/_api/__init__.py b/dendrite/logic/cache/__init__.py similarity index 100% rename from dendrite/sync_api/_api/__init__.py rename to dendrite/logic/cache/__init__.py diff --git a/dendrite/logic/cache/file_cache.py b/dendrite/logic/cache/file_cache.py new file mode 100644 index 0000000..b56bc18 --- /dev/null +++ b/dendrite/logic/cache/file_cache.py @@ -0,0 +1,179 @@ +import json +import threading +from hashlib import md5 +from pathlib import Path +from typing import ( + Any, + Dict, + Generic, + List, + Mapping, + Type, + TypeVar, + Union, + Optional, + overload, +) + +from pydantic import BaseModel + +T = TypeVar("T", bound=Union[BaseModel, Mapping[Any, Any]]) + + +class FileCache(Generic[T]): + def __init__( + self, model_class: Type[T], filepath: Union[str, Path] = "./cache.json" + ): + self.filepath = Path(filepath) + self.model_class = model_class + self.lock = threading.RLock() + self.cache: Dict[str, List[T]] = {} + + # Create file if it doesn't exist + if not self.filepath.exists(): + self.filepath.parent.mkdir(parents=True, exist_ok=True) + self._save_cache({}) + else: + self._load_cache() + + def _load_cache(self) -> None: + """Load cache from file into memory""" + with self.lock: + try: + json_string = self.filepath.read_text() + raw_dict = json.loads(json_string) + + # Convert each entry based on model_class type + self.cache = {} + for k, v_list in raw_dict.items(): + if not isinstance(v_list, list): + v_list = [v_list] # Convert old single-value format to list + + self.cache[k] = [] + for v in v_list: + if issubclass(self.model_class, BaseModel): + self.cache[k].append( + self.model_class.model_validate_json(json.dumps(v)) + ) + else: + # For any Mapping type (dict, TypedDict, etc) + self.cache[k].append(v) + except (json.JSONDecodeError, FileNotFoundError): + self.cache = {} + + def _save_cache(self, cache_dict: Dict[str, List[T]]) -> None: + """Save cache to file""" + with self.lock: + # Convert entries based on their type + serializable_dict = {} + for k, v_list in cache_dict.items(): + serializable_dict[k] = [] + for v in v_list: + if isinstance(v, BaseModel): + serializable_dict[k].append(json.loads(v.model_dump_json())) + elif isinstance(v, Mapping): + serializable_dict[k].append( + dict(v) + ) # Convert any Mapping to dict + else: + raise ValueError(f"Unsupported type for cache value: {type(v)}") + + self.filepath.write_text(json.dumps(serializable_dict, indent=2)) + + @overload + def get( + self, key: Union[str, Dict[str, str]], index: None = None + ) -> Optional[List[T]]: ... + + @overload + def get(self, key: Union[str, Dict[str, str]], index: int) -> Optional[T]: ... + + def get( + self, key: Union[str, Dict[str, str]], index: Optional[int] = None + ) -> Union[T, List[T], None]: + """ + Get cached values for a key. If index is provided, returns that specific item. + If index is None, returns the full list of items. + Returns None if key doesn't exist or index is out of range. + """ + hashed_key = self.hash(key) + values = self.cache.get(hashed_key, []) + + if index is not None: + return values[index] if 0 <= index < len(values) else None + return values if values else None + + def set(self, key: Union[str, Dict[str, str]], values: Union[T, List[T]]) -> None: + """ + Replace all values for a key with new value(s). + If a single value is provided, it will be wrapped in a list. + """ + hashed_key = self.hash(key) + with self.lock: + if isinstance(values, list): + self.cache[hashed_key] = values + else: + self.cache[hashed_key] = [values] + self._save_cache(self.cache) + + def append(self, key: Union[str, Dict[str, str]], value: T) -> None: + """ + Append a single value to the list of values for a key. + Creates a new list if the key doesn't exist. + """ + hashed_key = self.hash(key) + with self.lock: + if hashed_key not in self.cache: + self.cache[hashed_key] = [] + self.cache[hashed_key].append(value) + self._save_cache(self.cache) + + def delete(self, key: str, index: Optional[int] = None) -> None: + """ + Delete cached value(s). If index is provided, only that item is deleted. + If index is None, all items for the key are deleted. + """ + hashed_key = self.hash(key) + with self.lock: + if hashed_key in self.cache: + if index is not None and 0 <= index < len(self.cache[hashed_key]): + del self.cache[hashed_key][index] + if not self.cache[hashed_key]: # Remove key if list is empty + del self.cache[hashed_key] + else: + del self.cache[hashed_key] + self._save_cache(self.cache) + + def hash(self, key: Union[str, Dict]) -> str: + """ + Create a deterministic hash from a string or dictionary. + Handles nested structures and different value types. + """ + + def normalize_value(v): + if isinstance(v, dict): + return self.hash(v) + elif isinstance(v, (list, tuple)): + return "[" + ",".join(normalize_value(x) for x in v) + "]" + elif v is None: + return "null" + elif isinstance(v, bool): + return str(v).lower() + else: + return str(v).strip() + + if isinstance(key, dict): + try: + # Sort by normalized string keys + sorted_pairs = [ + f"{str(k).strip()}∴{normalize_value(v)}" # Using a rare Unicode character as delimiter + for k, v in sorted(key.items(), key=lambda x: str(x[0]).strip()) + ] + key = "❘".join(sorted_pairs) # Using another rare Unicode character + except Exception as e: + raise ValueError(f"Failed to process dictionary key: {e}") + + try: + return md5(str(key).encode("utf-8")).hexdigest() + except Exception as e: + raise ValueError(f"Failed to create hash: {e}") diff --git a/dendrite/sync_api/_api/dto/__init__.py b/dendrite/logic/code/__init__.py similarity index 100% rename from dendrite/sync_api/_api/dto/__init__.py rename to dendrite/logic/code/__init__.py diff --git a/dendrite/logic/code/code_session.py b/dendrite/logic/code/code_session.py new file mode 100644 index 0000000..fcb7300 --- /dev/null +++ b/dendrite/logic/code/code_session.py @@ -0,0 +1,166 @@ +import json # Important to keep since it is used inside the scripts +import re # Important to keep since it is used inside the scripts +import sys +import traceback +from datetime import datetime # Important to keep since it is used inside the scripts +from typing import Any, List, Optional + +from bs4 import BeautifulSoup +from jsonschema import validate +from loguru import logger + +from ..dom.truncate import truncate_long_string + + +class InterpreterError(Exception): + pass + + +def custom_exec( + cmd, + globals=None, + locals=None, +): + try: + exec(cmd, globals, locals) + except SyntaxError as err: + error_class = err.__class__.__name__ + detail = err.args[0] + line_number = err.lineno + except Exception as err: + error_class = err.__class__.__name__ + detail = err.args[0] + cl, exc, tb = sys.exc_info() + line_number = traceback.extract_tb(tb)[-1][1] + else: + return + + traceback_desc = traceback.format_exc() + raise InterpreterError( + f"{error_class} at line {line_number}. Detail: {detail}. Exception: {traceback_desc}" + ) + + +class CodeSession: + def __init__(self): + self.local_vars = {"soup": None, "html_string": "", "datetime": datetime} + + def get_local_var(self, name: str) -> Any: + try: + return self.local_vars[name] + except Exception as e: + return f"Error: Couldn't get local var with name {name}. Exception: {e}" + + def add_variable(self, name: str, value: Any): + self.local_vars[name] = value + + def exec_code( + self, + code: str, + soup: Optional[BeautifulSoup] = None, + html_string: Optional[str] = None, + ): + try: + self.local_vars["soup"] = soup + self.local_vars["html_string"] = html_string + self.local_vars["datetime"] = datetime + + copied_vars = self.local_vars.copy() + + try: + exec(code, globals(), copied_vars) + except SyntaxError as err: + error_class = err.__class__.__name__ + detail = err.args[0] + line_number = err.lineno + raise InterpreterError( + "%s at line %d, detail: %s" % (error_class, line_number, detail) + ) + except Exception as err: + error_class = err.__class__.__name__ + detail = err.args[0] + _, _, tb = sys.exc_info() + line_number = traceback.extract_tb(tb)[-1][1] + traceback_desc = traceback.format_exc() + raise InterpreterError( + "%s at line %d, detail: %s" + % (error_class, line_number, traceback_desc) + ) + + created_vars = { + k: v for k, v in copied_vars.items() if k not in self.local_vars + } + + self.local_vars = copied_vars + return created_vars + + except Exception as e: + raise Exception(f"Code failed to run. Exception: {e}") + + def validate_response(self, return_data_json_schema: Any, response_data: Any): + if return_data_json_schema != None: + try: + validate( + instance=response_data, + schema=return_data_json_schema, + ) + except Exception as e: + raise e + + def llm_readable_exec_res( + self, variables, prompt: str, attempts: int, max_attempts: int + ): + response = "Code executed.\n\n" + + if len(variables) == 0: + response += "No new variables were created." + else: + response += "Newly created variables:" + for var_name, var_value in variables.items(): + show_length = 600 if var_name == "response_data" else 300 + + try: + if var_value is None: + str_value = "None" + else: + str_value = str(var_value) + + except Exception as e: + logger.error( + f"Error converting to string for display: {e},\nvar_name: {var_name} | var_value{var_value}" + ) + str_value = "" + + truncated = truncate_long_string( + str_value, max_len_end=show_length, max_len_start=show_length + ) + extra_info = "" + if isinstance(var_value, List): + extra_info = f"\n{var_name}'s length is {len(var_value)}." + response += f"\n\n`{var_name}={truncated}`{extra_info}" + + response += f"\n\nDo these variables match the expected values? Remember, this is what the user asked for:\n\n{prompt}\n\nIf not, try again and remember, if one approach fails several times you might need to reinspect the DOM and try a different approach. You have {max_attempts - attempts} attempts left to try and complete the task. If you are happy with the results, output a success message." + + return response + + +def execute(script: str, raw_html: str, return_data_json_schema) -> Any: + code_session = CodeSession() + soup = BeautifulSoup(raw_html, "lxml") + try: + + created_variables = code_session.exec_code(script, soup, raw_html) + + if "response_data" in created_variables: + response_data = created_variables["response_data"] + + try: + code_session.validate_response(return_data_json_schema, response_data) + except Exception as e: + raise Exception(f"Failed to validate response data. Exception: {e}") + + return response_data + else: + raise Exception("No return data available for this script.") + except Exception as e: + raise e diff --git a/dendrite/logic/config.py b/dendrite/logic/config.py new file mode 100644 index 0000000..b69daf7 --- /dev/null +++ b/dendrite/logic/config.py @@ -0,0 +1,27 @@ +from pathlib import Path +from typing import Optional, Union + +from playwright.async_api import StorageState + +from dendrite.logic.cache.file_cache import FileCache +from dendrite.logic.llm.config import LLMConfig +from dendrite.models.scripts import Script +from dendrite.models.selector import Selector + + +class Config: + def __init__( + self, + root_path: Union[str, Path] = ".dendrite", + cache_path: Union[str, Path] = "cache", + auth_session_path: Union[str, Path] = "auth", + llm_config: Optional[LLMConfig] = None, + ): + self.cache_path = root_path / Path(cache_path) + self.llm_config = llm_config or LLMConfig() + self.extract_cache = FileCache(Script, self.cache_path / "extract.json") + self.element_cache = FileCache(Selector, self.cache_path / "get_element.json") + self.storage_cache = FileCache( + StorageState, self.cache_path / "storage_state.json" + ) + self.auth_session_path = root_path / Path(auth_session_path) diff --git a/dendrite/sync_api/_api/response/__init__.py b/dendrite/logic/dom/__init__.py similarity index 100% rename from dendrite/sync_api/_api/response/__init__.py rename to dendrite/logic/dom/__init__.py diff --git a/dendrite/logic/dom/css.py b/dendrite/logic/dom/css.py new file mode 100644 index 0000000..8555df4 --- /dev/null +++ b/dendrite/logic/dom/css.py @@ -0,0 +1,185 @@ +from typing import Optional + +from bs4 import BeautifulSoup, Tag +from loguru import logger + + +def find_css_selector(ele: Tag, soup: BeautifulSoup) -> str: + logger.debug(f"Finding selector for element: {ele.name} with attrs: {ele.attrs}") + + # Add this debug block + final_selector = "" # Track the selector being built + matches = [] # Track matching elements + + def debug_selector(selector: str) -> None: + nonlocal matches + try: + matches = soup.select(selector) + logger.debug(f"Selector '{selector}' matched {len(matches)} elements") + except Exception as e: + logger.error(f"Invalid selector '{selector}': {e}") + + # Check for inherently unique elements + if ele.name in ["html", "head", "body"]: + return ele.name + + # List of attributes to check for unique selectors + priority_attrs = [ + "id", + "name", + "data-testid", + "data-cy", + "data-qa", + "aria-label", + "aria-labelledby", + "for", + "href", + "alt", + "title", + "role", + "placeholder", + ] + + # Try attrs + for attr in priority_attrs: + if attr_selector := check_unique_attribute(ele, soup, attr, ele.name): + return attr_selector + + # Try class combinations + if class_selector := find_unique_class_combination(ele, soup): + return class_selector + + # If still not unique, use parent selector with nth-child + parent_selector = find_selector_with_parent(ele, soup) + + return parent_selector + + +def check_unique_attribute( + ele: Tag, soup: BeautifulSoup, attr: str, tag_name: str +) -> str: + attr_value = ele.get(attr) + if attr_value: + attr_value = css_escape(attr_value) + attr = css_escape(attr) + selector = f'{css_escape(tag_name)}[{attr}="{attr_value}"]' + if check_if_selector_successful(selector, soup, True): + return selector + return "" + + +def find_unique_class_combination(ele: Tag, soup: BeautifulSoup) -> str: + classes = ele.get("class", []) + + if isinstance(classes, str): + classes = [classes] + + if not classes: + return "" + + tag_name = css_escape(ele.name) + + # Try single classes first + for cls in classes: + selector = f"{tag_name}.{css_escape(cls)}" + if check_if_selector_successful(selector, soup, True): + return selector + + # If single classes don't work, try the full combination + full_selector = f"{tag_name}{'.'.join([''] + [css_escape(c) for c in classes])}" + if check_if_selector_successful(full_selector, soup, True): + return full_selector + + return "" + + +def find_selector_with_parent(ele: Tag, soup: BeautifulSoup) -> str: + parent = ele.find_parent() + if parent is None or parent == soup: + return f"{css_escape(ele.name)}" + + parent_selector = find_css_selector(parent, soup) + siblings_of_same_type = parent.find_all(ele.name, recursive=False) + + if len(siblings_of_same_type) == 1: + return f"{parent_selector} > {css_escape(ele.name)}" + else: + index = position_in_node_list(ele, parent) + return f"{parent_selector} > {css_escape(ele.name)}:nth-child({index})" + + +def position_in_node_list(element: Tag, parent: Tag): + for index, child in enumerate(parent.find_all(recursive=False)): + if child == element: + return index + 1 + return -1 + + +# https://github.com/mathiasbynens/CSS.escape +def css_escape(value): + if len(str(value)) == 0: + raise TypeError("`CSS.escape` requires an argument.") + + string = str(value) + length = len(string) + result = "" + first_code_unit = ord(string[0]) if length > 0 else None + + if length == 1 and first_code_unit == 0x002D: + return "\\" + string + + for index in range(length): + code_unit = ord(string[index]) + + if code_unit == 0x0000: + result += "\uFFFD" + continue + + if ( + (0x0001 <= code_unit <= 0x001F) + or code_unit == 0x007F + or (index == 0 and 0x0030 <= code_unit <= 0x0039) + or ( + index == 1 + and 0x0030 <= code_unit <= 0x0039 + and first_code_unit == 0x002D + ) + ): + result += "\\" + format(code_unit, "x") + " " + continue + + if ( + code_unit >= 0x0080 + or code_unit == 0x002D + or code_unit == 0x005F + or 0x0030 <= code_unit <= 0x0039 + or 0x0041 <= code_unit <= 0x005A + or 0x0061 <= code_unit <= 0x007A + ): + result += string[index] + continue + + result += "\\" + string[index] + + return result + + +def check_if_selector_successful( + selector: str, + bs4: BeautifulSoup, + only_one: bool, +) -> Optional[str]: + + els = None + try: + els = bs4.select(selector) + except Exception as e: + logger.warning(f"Error selecting {selector}: {e}") + + if els: + if only_one and len(els) == 1: + return selector + elif not only_one and len(els) >= 1: + return selector + + return None diff --git a/dendrite/logic/dom/strip.py b/dendrite/logic/dom/strip.py new file mode 100644 index 0000000..fb4dc43 --- /dev/null +++ b/dendrite/logic/dom/strip.py @@ -0,0 +1,158 @@ +import copy +from typing import List, Union, overload + +from bs4 import BeautifulSoup, Comment, Doctype, Tag + + +def mild_strip(soup: Tag, keep_d_id: bool = True) -> BeautifulSoup: + new_soup = BeautifulSoup(str(soup), "html.parser") + _mild_strip(new_soup, keep_d_id) + return new_soup + + +def mild_strip_in_place(soup: BeautifulSoup, keep_d_id: bool = True) -> None: + _mild_strip(soup, keep_d_id) + + +def _mild_strip(soup: BeautifulSoup, keep_d_id: bool = True) -> None: + for element in soup(text=lambda text: isinstance(text, Comment)): + element.extract() + + # for text in soup.find_all(text=lambda text: isinstance(text, NavigableString)): + # if len(text) > 200: + # text.replace_with(text[:200] + f"... [{len(text)-200} more chars]") + + for tag in soup( + ["head", "script", "style", "path", "polygon", "defs", "svg", "br", "Doctype"] + ): + tag.extract() + + for element in soup.contents: + if isinstance(element, Doctype): + element.extract() + + # for tag in soup.find_all(True): + # tag.attrs = { + # attr: (value[:100] if isinstance(value, str) else value) + # for attr, value in tag.attrs.items() + # } + # if keep_d_id == False: + # del tag["d-id"] + for tag in soup.find_all(True): + if tag.attrs.get("is-interactable-d_id") == "true": + continue + + tag.attrs = { + attr: (value[:100] if isinstance(value, str) else value) + for attr, value in tag.attrs.items() + } + if keep_d_id == False: + del tag["d-id"] + + # if browser != None: + # for elem in list(soup.descendants): + # if isinstance(elem, Tag) and not browser.element_is_visible(elem): + # elem.extract() + + +@overload +def shorten_attr_val(value: str, limit: int = 50) -> str: ... + + +@overload +def shorten_attr_val(value: List[str], limit: int = 50) -> List[str]: ... + + +def shorten_attr_val( + value: Union[str, List[str]], limit: int = 50 +) -> Union[str, List[str]]: + if isinstance(value, str): + return value[:limit] + + char_count = sum(map(len, value)) + if char_count <= limit: + return value + + while len(value) > 1 and char_count > limit: + char_count -= len(value.pop()) + + if len(value) == 1: + return value[0][:limit] + + return value + + +def clear_attrs(element: Tag): + + salient_attributes = [ + "d-id", + "class", + "id", + "type", + "alt", + "aria-describedby", + "aria-label", + "contenteditable", + "aria-role", + "input-checked", + "label", + "name", + "option_selected", + "placeholder", + "readonly", + "text-value", + "title", + "value", + "href", + "role", + "action", + "method", + ] + attrs = { + attr: shorten_attr_val(value, limit=200) + for attr, value in element.attrs.items() + if attr in salient_attributes + } + element.attrs = attrs + + +def strip_soup(soup: BeautifulSoup) -> BeautifulSoup: + # Create a copy of the soup to avoid modifying the original + stripped_soup = BeautifulSoup(str(soup), "html.parser") + + for tag in stripped_soup( + [ + "head", + "script", + "style", + "path", + "polygon", + "defs", + "br", + "Doctype", + ] # add noscript? + ): + tag.extract() + + # Remove comments + comments = stripped_soup.find_all(text=lambda text: isinstance(text, Comment)) + for comment in comments: + comment.extract() + + # Clear non-salient attributes + for element in stripped_soup.find_all(True): + if isinstance(element, Doctype): + element.extract() + else: + clear_attrs(element) + + return stripped_soup + + +def remove_hidden_elements(soup: BeautifulSoup): + # data-hidden is added by DendriteBrowser when an element is not visible + new_soup = copy.copy(soup) + elems = new_soup.find_all(attrs={"data-hidden": True}) + for elem in elems: + elem.extract() + return new_soup diff --git a/dendrite/logic/dom/truncate.py b/dendrite/logic/dom/truncate.py new file mode 100644 index 0000000..fa1bfd8 --- /dev/null +++ b/dendrite/logic/dom/truncate.py @@ -0,0 +1,73 @@ +import re + + +def truncate_long_string( + val: str, + max_len_start: int = 150, + max_len_end: int = 150, + trucate_desc: str = "chars truncated for readability", +): + return ( + val + if len(val) < max_len_start + max_len_end + else val[:max_len_start] + + f"... [{len(val)-max_len_start-max_len_end} {trucate_desc}] ..." + + val[-max_len_end:] + ) + + +def truncate_long_string_w_words( + val: str, + max_len_start: int = 150, + max_len_end: int = 150, + trucate_desc: str = "words truncated for readability", + show_more_words_for_longer_val: bool = True, +): + if len(val) < max_len_start + max_len_end: + return val + else: + if show_more_words_for_longer_val: + max_len_end += int(len(val) / 100) + max_len_end += int(len(val) / 100) + + truncate_start_pos = max_len_start + steps_taken_start = 0 + while ( + truncate_start_pos > 0 + and val[truncate_start_pos] not in [" ", "\n"] + and steps_taken_start < 20 + ): + truncate_start_pos -= 1 + steps_taken_start += 1 + + truncate_end_pos = len(val) - max_len_end + steps_taken_end = 0 + while ( + truncate_end_pos < len(val) + and val[truncate_end_pos] not in [" ", "\n"] + and steps_taken_end < 20 + ): + truncate_end_pos += 1 + steps_taken_end += 1 + + if steps_taken_start >= 20 or steps_taken_end >= 20: + # Return simple truncation if we've looped further than 20 chars + return truncate_long_string(val, max_len_start, max_len_end, trucate_desc) + else: + return ( + val[:truncate_start_pos] + + f" [...{len(val[truncate_start_pos:truncate_end_pos].split())} {trucate_desc}...] " + + val[truncate_end_pos:] + ) + + +def remove_excessive_whitespace(text: str, max_whitespaces=1): + return re.sub(r"\s{2,}", " " * max_whitespaces, text) + + +def truncate_and_remove_whitespace(text, max_len_start=100, max_len_end=100): + return truncate_long_string_w_words( + remove_excessive_whitespace(text), + max_len_start=max_len_start, + max_len_end=max_len_end, + ) diff --git a/dendrite/sync_api/_common/__init__.py b/dendrite/logic/extract/__init__.py similarity index 100% rename from dendrite/sync_api/_common/__init__.py rename to dendrite/logic/extract/__init__.py diff --git a/dendrite/logic/extract/cache.py b/dendrite/logic/extract/cache.py new file mode 100644 index 0000000..36aab75 --- /dev/null +++ b/dendrite/logic/extract/cache.py @@ -0,0 +1,55 @@ +from datetime import datetime +from typing import Any, List, Optional, Tuple +from urllib.parse import urlparse + +from loguru import logger + +from dendrite.logic.cache.file_cache import FileCache +from dendrite.logic.code.code_session import execute +from dendrite.logic.config import Config +from dendrite.models.dto.cached_extract_dto import CachedExtractDTO +from dendrite.models.scripts import Script + + +def save_script(code: str, prompt: str, url: str, cache: FileCache[Script]): + domain = urlparse(url).netloc + script = Script( + url=url, domain=domain, script=code, created_at=datetime.now().isoformat() + ) + cache.append({"prompt": prompt, "domain": domain}, script) + + +def get_scripts( + prompt: str, url: str, cache: FileCache[Script] +) -> Optional[List[Script]]: + domain = urlparse(url).netloc + return cache.get({"prompt": prompt, "domain": domain}) + + +async def get_working_cached_script( + prompt: str, raw_html: str, url: str, return_data_json_schema: Any, config: Config +) -> Optional[Tuple[Script, Any]]: + + if len(url) == 0: + raise Exception("Domain must be specified") + + scripts = get_scripts(prompt, url, config.extract_cache) + if scripts is None or len(scripts) == 0: + return None + logger.debug( + f"Found {len(scripts)} scripts in cache | Prompt: {prompt} in domain: {url}" + ) + + for script in scripts: + try: + res = execute(script.script, raw_html, return_data_json_schema) + return script, res + except Exception as e: + logger.debug( + f"Script failed with error: {str(e)} | Prompt: {prompt} in domain: {url}" + ) + continue + + raise Exception( + f"No working script found in cache even though {len(scripts)} scripts were available | Prompt: '{prompt}' in domain: '{url}'" + ) diff --git a/dendrite/logic/extract/compress_html.py b/dendrite/logic/extract/compress_html.py new file mode 100644 index 0000000..f7fb3fc --- /dev/null +++ b/dendrite/logic/extract/compress_html.py @@ -0,0 +1,490 @@ +import re +import time +from collections import Counter +from typing import List, Optional, Tuple, TypedDict, Union + +from bs4 import BeautifulSoup, NavigableString, PageElement +from bs4.element import Tag + +from dendrite.logic.dom.truncate import ( + truncate_and_remove_whitespace, + truncate_long_string_w_words, +) +from dendrite.logic.llm.token_count import token_count + +MAX_REPEATING_ELEMENT_AMOUNT = 6 + + +class FollowableListInfo(TypedDict): + expanded_elements: List[Tag] + amount: int + parent_element_d_id: str + first_element_d_id: str + + +class CompressHTML: + def __init__( + self, + root_soup: Union[BeautifulSoup, Tag], + ids_to_expand: List[str] = [], + compression_multiplier: float = 1, + exclude_dendrite_ids=False, + max_token_size: int = 80000, + max_size_per_element: int = 6000, + focus_on_text=False, + ) -> None: + if exclude_dendrite_ids == True: + for tag in root_soup.find_all(): + if "d-id" in tag.attrs: + del tag["d-id"] + + self.orginal_size = len(str(root_soup)) + self.root = BeautifulSoup(str(root_soup), "html.parser") + self.original_root = BeautifulSoup(str(root_soup), "html.parser") + self.ids_to_expand = ids_to_expand + self.expand_crawlable_list = False + self.compression_multiplier = compression_multiplier + self.lists_with_followable_urls: List[FollowableListInfo] = [] + self.max_token_size = max_token_size + self.max_size_per_element = max_size_per_element + self.focus_on_text = focus_on_text + self.search_terms = [] + + def get_lists_with_followable_urls(self): + return self.lists_with_followable_urls + + def _remove_consecutive_newlines(self, text: str, max_newlines=1): + cleaned_text = re.sub(r"\n{2,}", "\n" * max_newlines, text) + return cleaned_text + + def _parent_is_explicitly_expanded(self, tag: Tag) -> bool: + for tag in tag.parents: + if tag.get("d-id", None) in self.ids_to_expand: + return True + return False + + def _should_expand_anyways(self, tag: Tag) -> bool: + curr_id = tag.get("d-id", None) + + if curr_id in self.ids_to_expand: + return True + + tag_descendants = [ + descendant for descendant in tag.descendants if isinstance(descendant, Tag) + ] + for tag in tag_descendants: + id = tag.get("d-id", None) + if id in self.ids_to_expand: + return True + + for parent in tag.parents: + id = parent.get("d-id", None) + if id in self.ids_to_expand: + return True + # Expand the children of expanded elements if the expanded element isn't too big + if len(str(parent)) > 4000: + return False + + return False + + def clear_attrs(self, element: Tag, unique_class_names: List[str]): + attrs = {} + class_attr = element.get("class", []) + salient_attributes = [ + "type" "alt", + "aria-describedby", + "aria-label", + "aria-role", + "input-checked", + "label", + "name", + "option_selected", + "placeholder", + "readonly", + "text-value", + "title", + "value", + "href", + ] + + attrs = { + attr: (str(value)[:100] if len(str(value)) > 100 else str(value)) + for attr, value in element.attrs.items() + if attr in salient_attributes + } + + if class_attr: + if isinstance(class_attr, str): + class_attr = class_attr.split(" ") + + class_name_len = 0 + class_max_len = 200 + classes_to_show = [] + for class_name in class_attr: + if class_name_len + len(class_name) < class_max_len: + classes_to_show.append(class_name) + class_name_len += len(class_name) + + if len(classes_to_show) > 0: + attrs = {**attrs, "class": " ".join(classes_to_show)} + + id = element.get("id") + d_id = element.get("d-id") + + if isinstance(id, str): + attrs = {**attrs, "id": id} + + if d_id: + attrs = {**attrs, "d-id": d_id} + + element.attrs = attrs + + def extract_crawlable_list( + self, repeating_element_sequence_ids: List[str], amount_repeating_left: int + ): + items: List[Tag] = [] + parent_element_d_id: str = "" + first_element_d_id = repeating_element_sequence_ids[0] + + for d_id in repeating_element_sequence_ids: + + el = self.original_root.find(attrs={"d-id": str(d_id)}) + if ( + parent_element_d_id == "" + and isinstance(el, Tag) + and isinstance(el.parent, Tag) + ): + parent_element_d_id = str(el.parent.get("d-id", "")) + + original = BeautifulSoup(str(el), "html.parser") + link = original.find("a") + if link and isinstance(original, Tag): + items.append(original) + + if ( + len(items) == len(repeating_element_sequence_ids) + and len(items) >= MAX_REPEATING_ELEMENT_AMOUNT + and parent_element_d_id != "" + ): + self.lists_with_followable_urls.append( + { + "amount": len(items) + amount_repeating_left, + "expanded_elements": items, + "parent_element_d_id": parent_element_d_id, + "first_element_d_id": first_element_d_id, + } + ) + + def get_html_display(self) -> str: + def collapse(element: PageElement) -> str: + chars_to_keep = 2000 if self.focus_on_text else 100 + + if isinstance(element, Tag): + if element.get("d-id", "") == "-1": + return "" + + text = element.get_text() + if text: + element.attrs["is-compressed"] = "true" + element.attrs["d-id"] = str(element.get("d-id", "")) + element.clear() + element.append( + truncate_and_remove_whitespace( + text, max_len_start=chars_to_keep, max_len_end=chars_to_keep + ) + ) + return str(element) + else: + return "" + elif isinstance(element, NavigableString): + return truncate_and_remove_whitespace( + element, max_len_start=chars_to_keep, max_len_end=chars_to_keep + ) + else: + return "" + + start_time = time.time() + class_names = [ + name for tag in self.root.find_all() for name in tag.get("class", []) + ] + + counts = Counter(class_names) + unique_class_names = [name for name, count in counts.items() if count == 1] + + def get_repeating_element_info(el: Tag) -> Tuple[str, List[str]]: + return ( + el.name, + [el.name for el in el.children if isinstance(el, Tag)], + ) + + def is_repeating_element( + previous_element_info: Optional[Tuple[str, List[str]]], element: Tag + ) -> bool: + if previous_element_info: + repeat_element_info = get_repeating_element_info(element) + return ( + previous_element_info == repeat_element_info + and element.name != "div" + ) + + return False + + # children_size += token_count(str(child)) + # if children_size > 400: + # children_left = {} + # for c in child.next_siblings: + # if isinstance(c, Tag): + # if c.name in children_left: + # children_left[c.name] += 1 + # else: + # children_left[c.name] = 0 + # desc = "" + # for c_name in children_left.keys(): + # desc = f"{children_left[c_name]} {c_name} tag(s) truncated for readability" + # child.replace_with(f"[...{desc}...]") + # break + + def traverse(tag: Union[BeautifulSoup, Tag]): + previous_element_info: Optional[Tuple[str, List[str]]] = None + repeating_element_sequence_ids = [] + has_placed_truncation = False + same_element_repeat_amount: int = 0 + + tag_children = (child for child in tag.children if isinstance(child, Tag)) + + total_token_size = 0 + for index, child in enumerate(tag_children): + + total_token_size += len(str(child)) + # if total_token_size > self.max_size_per_element * 4 and index > 60: + # names = {} + # for next_sibling in child.next_siblings: + # if isinstance(next_sibling, Tag): + # if next_sibling.name in names: + # names[next_sibling.name] += 1 + # else: + # names[next_sibling.name] = 1 + + # removable = [sib for sib in child.next_siblings] + # for sib in removable: + # try: + # sib.replace_with("") + # except: + # print("failed to replace sib: ", str(sib)) + + # truncation_message = [] + # for element_name, amount_hidden in names.items(): + # truncation_message.append( + # f"{amount_hidden} `{element_name}` element(s)" + # ) + + # child.replace_with( + # f"[...{','.join(truncation_message)} hidden for readablity ...]" + # ) + # break + + repeating_element_sequence_ids.append(child.get("d-id", "None")) + + if is_repeating_element(previous_element_info, child): + same_element_repeat_amount += 1 + + if ( + same_element_repeat_amount > MAX_REPEATING_ELEMENT_AMOUNT + and self._parent_is_explicitly_expanded(child) == False + ): + amount_repeating = 0 + if isinstance(child, Tag): + for sibling in child.next_siblings: + if isinstance(sibling, Tag) and is_repeating_element( + previous_element_info, sibling + ): + amount_repeating += 1 + + if has_placed_truncation == False and amount_repeating >= 1: + child.replace_with( + f"[...{amount_repeating} repeating `{child.name}` elements collapsed for readability...]" + ) + has_placed_truncation = True + + self.extract_crawlable_list( + repeating_element_sequence_ids, amount_repeating + ) + + if self.expand_crawlable_list == True: + for d_id in repeating_element_sequence_ids: + sequence_element = self.root.find( + attrs={"d-id": str(d_id)} + ) + + if isinstance(sequence_element, Tag): + original = BeautifulSoup( + str( + self.original_root.find( + attrs={"d-id": str(d_id)} + ) + ), + "html.parser", + ) + links = original.find_all("a") + for link in links: + + self.ids_to_expand.append( + str(link.get("d-id", "None")) + ) + sequence_element.replace_with(original) + traverse(sequence_element) + + repeating_element_sequence_ids = [] + else: + child.replace_with("") + continue + + else: + has_placed_truncation = False + previous_element_info = get_repeating_element_info(child) + same_element_repeat_amount = 0 + + # If a parent is expanded, allow larger element until collapsing + compression_mod = self.compression_multiplier + if self._parent_is_explicitly_expanded(child): + compression_mod = 0.5 + + if len(str(child)) < self.orginal_size // 300 * compression_mod: + if self._should_expand_anyways(child): + traverse(child) + else: + chars_to_keep = 2000 if self.focus_on_text else 80 + truncated_text = truncate_long_string_w_words( + child.get_text().replace("\n", ""), + max_len_start=chars_to_keep, + max_len_end=chars_to_keep, + ) + if truncated_text.strip(): + child.attrs = { + "is-compressed": "true", + "d-id": str(child.get("d-id", "")), + } + child.string = truncated_text + else: + child.replace_with("") + elif len(str(child)) > self.orginal_size // 10 * compression_mod: + traverse(child) + else: + if self._should_expand_anyways(child): + traverse(child) + else: + replacement = collapse(child) + child.replace_with(BeautifulSoup(replacement, "html.parser")) + + # total_token_size += len(str(child)) + # print("total_token_size: ", total_token_size) + + # if total_token_size > 2000: + # next_element_tags = [ + # sibling.name for sibling in child.next_siblings if isinstance(sibling, Tag)] + # child.replace_with( + # f"[...{', '.join(next_element_tags)} tags collapsed for readability...]") + + def remove_double_nested(soup): + for tag in soup.find_all(True): + # If a tag only contains a single child of the same type + if len(tag.find_all(True, recursive=False)) == 1 and isinstance( + tag.contents[0], Tag + ): + child_tag = tag.contents[0] + # move the contents of the child tag up to the parent + tag.clear() + tag.extend(child_tag.contents) + if len(tag.find_all(True, recursive=False)) == 1 and isinstance( + tag.contents[0], Tag + ): + remove_double_nested(tag) + + return soup + + def is_effectively_empty(element): + if element.name and not element.attrs: + if not element.contents or all( + isinstance(child, NavigableString) and len(child.strip()) < 3 + for child in element.contents + ): + return True + return False + + start_time = time.time() + for i in range(10): + for element in self.root.find_all(is_effectively_empty): + element.decompose() + + for tag in self.root.find_all(): + self.clear_attrs(tag, unique_class_names) + + if len(str(self.root)) < 1500: + return self.root.prettify() + + # print("time: ", end_time - start_time) + + # remove_double_nested(self.root) + # clean_attributes(root, keep_dendrite_id=False) + traverse(self.root) + # print("traverse time: ", end_time - start_time) + + return self.root.prettify() + + def get_compression_level(self) -> Tuple[str, int]: + if self.orginal_size > 100000: + return "4/4 (Extremely compressed)", 4 + elif self.orginal_size > 40000: + return "3/4 (Very compressed)", 3 + elif self.orginal_size > 4000: + return "2/4 (Slightly compressed)", 2 + elif self.orginal_size > 400: + return "1/4 (Very mild compression)", 1 + else: + return "0/4 (no compression)", 0 + + async def compress(self, search_terms: List[str] = []) -> str: + iterations = 0 + pretty = "" + self.search_terms = search_terms + + while token_count(pretty) > self.max_token_size or pretty == "": + iterations += 1 + if iterations > 5: + break + compression_level_desc, _ = self.get_compression_level() + # Show elements with relevant search terms more + if len(self.search_terms) > 0: + + def contains_text(element): + if element: + # Check only direct text content, not including nested elements + direct_text = "".join( + child + for child in element.children + if isinstance(child, NavigableString) + ).lower() + return any( + term.lower() in direct_text for term in self.search_terms + ) + return False + + matching_elements = self.original_root.find_all(contains_text) + for element in matching_elements: + print(f"Element contains search word: {str(element)[:400]}") + d_id = element.get("d-id") + if d_id: + self.ids_to_expand.append(d_id) + + # print("old: ", self.orginal_size) + md = self.get_html_display() + md = self._remove_consecutive_newlines(md) + pretty = BeautifulSoup(md, "html.parser").prettify() + end = time.time() + # print("pretty: ", pretty) + # print("new: ", token_count(pretty)) + # print("took: ", end - start) + # print("compression_level: ", compression_level_desc) + self.compression_multiplier *= 2 + + return pretty diff --git a/dendrite/logic/extract/extract.py b/dendrite/logic/extract/extract.py new file mode 100644 index 0000000..e44664f --- /dev/null +++ b/dendrite/logic/extract/extract.py @@ -0,0 +1,156 @@ +import asyncio +import hashlib +from typing import List, Optional +from urllib.parse import urlparse + +from loguru import logger + +from dendrite.logic.config import Config +from dendrite.logic.extract.cache import get_scripts, get_working_cached_script +from dendrite.logic.extract.extract_agent import ExtractAgent +from dendrite.models.dto.cached_extract_dto import CachedExtractDTO +from dendrite.models.dto.extract_dto import ExtractDTO +from dendrite.models.response.extract_response import ExtractResponse +from dendrite.models.scripts import Script + + +async def get_cached_scripts(dto: CachedExtractDTO, config: Config) -> List[Script]: + return get_scripts(dto.prompt, dto.url, config.extract_cache) or [] + + +async def test_cache( + extract_dto: ExtractDTO, config: Config +) -> Optional[ExtractResponse]: + try: + + cached_script_res = await get_working_cached_script( + extract_dto.combined_prompt, + extract_dto.page_information.raw_html, + extract_dto.page_information.url, + extract_dto.return_data_json_schema, + config, + ) + + if cached_script_res is None: + return None + + script, script_exec_res = cached_script_res + return ExtractResponse( + status="success", + message="Re-used a preexisting script from cache with the same specifications.", + return_data=script_exec_res, + created_script=script.script, + ) + + except Exception as e: + return ExtractResponse( + status="failed", + message=str(e), + ) + + +class InMemoryLockManager: + # Class-level dictionaries to keep track of locks and events + locks = {} + events = {} + global_lock = asyncio.Lock() + + def __init__( + self, + extract_page_dto: ExtractDTO, + ): + self.key = self.generate_key(extract_page_dto) + + def generate_key(self, extract_page_dto: ExtractDTO) -> str: + domain = urlparse(extract_page_dto.page_information.url).netloc + key_data = f"{domain}:{extract_page_dto.combined_prompt}" + return hashlib.sha256(key_data.encode()).hexdigest() + + async def acquire_lock(self, timeout: int = 60) -> bool: + async with InMemoryLockManager.global_lock: + if self.key in InMemoryLockManager.locks: + # Lock is already acquired + return False + else: + # Acquire the lock + InMemoryLockManager.locks[self.key] = True + return True + + async def release_lock(self): + async with InMemoryLockManager.global_lock: + InMemoryLockManager.locks.pop(self.key, None) + InMemoryLockManager.events.pop(self.key, None) + + async def publish(self, message: str): + async with InMemoryLockManager.global_lock: + event = InMemoryLockManager.events.get(self.key) + if event: + event.set() + + async def subscribe(self): + async with InMemoryLockManager.global_lock: + if self.key not in InMemoryLockManager.events: + InMemoryLockManager.events[self.key] = asyncio.Event() + # No need to assign to self.event; return the event instead + return InMemoryLockManager.events[self.key] + + async def wait_for_notification( + self, event: asyncio.Event, timeout: float = 1600.0 + ) -> bool: + try: + await asyncio.wait_for(event.wait(), timeout) + return True + except asyncio.TimeoutError as e: + logger.error(f"Timeout error: {e}") + return False + finally: + # Clean up event + async with InMemoryLockManager.global_lock: + InMemoryLockManager.events.pop(self.key, None) + + +async def extract(extract_page_dto: ExtractDTO, config: Config) -> ExtractResponse: + + lock_manager = InMemoryLockManager(extract_page_dto) + lock_acquired = await lock_manager.acquire_lock() + + if lock_acquired: + return await generate_script(extract_page_dto, lock_manager, config) + else: + res = await wait_for_script_generation(extract_page_dto, lock_manager, config) + + if res: + return res + # Else create a working script since page is different + extract_agent = ExtractAgent(extract_page_dto.page_information, config=config) + res = await extract_agent.write_and_run_script(extract_page_dto) + return res + + +async def generate_script( + extract_page_dto: ExtractDTO, lock_manager: InMemoryLockManager, config: Config +) -> ExtractResponse: + try: + extract_agent = ExtractAgent(extract_page_dto.page_information, config=config) + res = await extract_agent.write_and_run_script(extract_page_dto) + await lock_manager.publish("done") + return res + except Exception as e: + await lock_manager.publish("failed") + raise e + finally: + await lock_manager.release_lock() + + +async def wait_for_script_generation( + extract_page_dto: ExtractDTO, lock_manager: InMemoryLockManager, config: Config +) -> Optional[ExtractResponse]: + event = await lock_manager.subscribe() + logger.info("Waiting for script to be generated") + notification_received = await lock_manager.wait_for_notification(event) + + # If script was created after waiting + if notification_received: + res = await test_cache(extract_page_dto, config) + if res: + return res diff --git a/dendrite/logic/extract/extract_agent.py b/dendrite/logic/extract/extract_agent.py new file mode 100644 index 0000000..6173351 --- /dev/null +++ b/dendrite/logic/extract/extract_agent.py @@ -0,0 +1,289 @@ +import json +import re +import sys +from typing import List, Union + +from bs4 import BeautifulSoup + +from dendrite import logger + +from dendrite.logic.config import Config +from dendrite.logic.dom.strip import mild_strip +from dendrite.logic.extract.cache import save_script +from dendrite.logic.extract.prompts import ( + LARGE_HTML_CHAR_TRUNCATE_LEN, + create_script_prompt_segmented_html, +) +from dendrite.logic.extract.scroll_agent import ScrollAgent +from dendrite.logic.get_element.hanifi_search import get_expanded_dom +from dendrite.logic.llm.agent import Agent, Message +from dendrite.logic.llm.token_count import token_count +from dendrite.models.dto.extract_dto import ExtractDTO +from dendrite.models.page_information import PageInformation +from dendrite.models.response.extract_response import ExtractResponse + +from ..ask.image import segment_image +from ..code.code_session import CodeSession + + +class ExtractAgent(Agent): + def __init__(self, page_information: PageInformation, config: Config) -> None: + super().__init__(config.llm_config.get("extract_agent")) + self.page_information = page_information + self.soup = BeautifulSoup(page_information.raw_html, "lxml") + self.messages = [] + self.current_segment = 0 + self.config = config + + async def write_and_run_script( + self, extract_page_dto: ExtractDTO + ) -> ExtractResponse: + mild_soup = mild_strip(self.soup) + + segments = segment_image( + extract_page_dto.page_information.screenshot_base64, segment_height=4000 + ) + + scroll_agent = ScrollAgent( + self.page_information, llm_config=self.config.llm_config + ) + scroll_result = await scroll_agent.scroll_through_page( + extract_page_dto.combined_prompt, + image_segments=segments, + ) + + if scroll_result.status == "error": + return ExtractResponse( + status="impossible", + message=str(scroll_result.message), + ) + + if scroll_result.status == "loading": + return ExtractResponse( + status="loading", + message="This page is still loading. Please wait a bit longer.", + ) + + expanded_html = None + + if scroll_result.element_to_inspect_html: + combined_prompt = ( + "Get these elements (make sure you only return element that you are confident that these are the correct elements, it's OK to not select any elements):\n- " + + "\n- ".join(scroll_result.element_to_inspect_html) + ) + expanded = await get_expanded_dom( + mild_soup, combined_prompt, self.config.llm_config + ) + if expanded: + expanded_html = expanded[0] + + if expanded_html: + return await self.code_script_from_found_expanded_html_tags( + extract_page_dto, expanded_html + ) + + raise Exception("Failed to extract data from the page") # TODO: skriv bättre + + def segment_large_tag(self, tag): + segments = [] + current_segment = "" + current_tokens = 0 + for line in tag.split("\n"): + line_tokens = token_count(line) + if current_tokens + line_tokens > 4000: + segments.append(current_segment) + current_segment = line + current_tokens = line_tokens + else: + current_segment += line + "\n" + current_tokens += line_tokens + if current_segment: + segments.append(current_segment) + return segments + + async def code_script_from_found_expanded_html_tags( + self, extract_page_dto: ExtractDTO, expanded_html + ): + + agent_logger = logger.bind(scope="extract", step="generate_code") + + user_prompt = create_script_prompt_segmented_html( + extract_page_dto.combined_prompt, + expanded_html, + self.page_information.url, + ) + # agent_logger.debug(f"User prompt created: {user_prompt[:100]}...") + + content = { + "type": "text", + "text": user_prompt, + } + + messages: List[Message] = [ + {"role": "user", "content": user_prompt}, + ] + + iterations = 0 + max_retries = 10 + + for iterations in range(max_retries): + agent_logger.debug(f"Code generation | Iteration: {iterations}") + + text = await self.call_llm(messages) + messages.append({"role": "assistant", "content": text}) + + json_pattern = r"```json(.*?)```" + code_pattern = r"```python(.*?)```" + + if text is None: + content = "Error: Failed to generate content." + messages.append({"role": "user", "content": content}) + continue + + json_matches = re.findall(json_pattern, text, re.DOTALL) + code_matches = re.findall(code_pattern, text, re.DOTALL) + + if len(json_matches) + len(code_matches) > 1: + content = "Error: Please output only one action at a time (either JSON or Python code, not both)." + messages.append({"role": "user", "content": content}) + continue + + if code_matches: + self.generated_script = code_matches[0].strip() + result = await self._handle_code_match( + code_matches[0].strip(), + messages, + iterations, + max_retries, + extract_page_dto, + agent_logger, + ) + + messages.extend(result) + continue + + elif json_matches: + result = self._handle_json_match(json_matches[0], expanded_html) + if isinstance(result, ExtractResponse): + save_script( + self.generated_script, + extract_page_dto.combined_prompt, + self.page_information.url, + cache=self.config.extract_cache, + ) + return result + elif isinstance(result, list): + messages.extend(result) + continue + else: + # If neither code nor json matches found, send error message + content = "Error: Could not find valid code or JSON in the assistant's response." + messages.append({"role": "user", "content": content}) + continue + + # agent_logger.warning("Failed to create script after retrying several times") + return ExtractResponse( + status="failed", + message="Failed to create script after retrying several times.", + return_data=None, + created_script=self.generated_script, + ) + + async def _handle_code_match( + self, + generated_script: str, + messages: List[Message], + iterations, + max_retries, + extract_page_dto: ExtractDTO, + agent_logger, + ) -> List[Message]: + temp_code_session = CodeSession() + + try: + variables = temp_code_session.exec_code( + generated_script, self.soup, self.page_information.raw_html + ) + + if "response_data" not in variables: + return [ + { + "role": "user", + "content": "Error: You need to add the variable 'response_data'", + } + ] + + self.response_data = variables["response_data"] + + if extract_page_dto.return_data_json_schema: + temp_code_session.validate_response( + extract_page_dto.return_data_json_schema, self.response_data + ) + + llm_readable_exec_res = temp_code_session.llm_readable_exec_res( + variables, + extract_page_dto.combined_prompt, + iterations, + max_retries, + ) + + return [{"role": "user", "content": llm_readable_exec_res}] + + except Exception as e: + return [{"role": "user", "content": f"Error: {str(e)}"}] + + def _handle_json_match( + self, json_str: str, expanded_html: str + ) -> Union[ExtractResponse, List[Message]]: + try: + data_dict = json.loads(json_str) + + if "request_more_html" in data_dict: + return self._handle_more_html_request(expanded_html) + + if "error" in data_dict: + raise Exception(data_dict["error"]) + + if "success" in data_dict: + return ExtractResponse( + status="success", + message=data_dict["success"], + return_data=self.response_data, + created_script=self.generated_script, + ) + return [ + { + "role": "user", + "content": "Error: JSON response does not specify a valid action.", + } + ] + + except Exception as e: + return [{"role": "user", "content": f"Error: {str(e)}"}] + + def _handle_more_html_request(self, expanded_html: str) -> List[Message]: + + if LARGE_HTML_CHAR_TRUNCATE_LEN * (self.current_segment + 1) >= len( + expanded_html + ): + return [{"role": "user", "content": "There is no more HTML to show."}] + + self.current_segment += 1 + start = LARGE_HTML_CHAR_TRUNCATE_LEN * self.current_segment + end = min( + LARGE_HTML_CHAR_TRUNCATE_LEN * (self.current_segment + 1), + len(expanded_html), + ) + + content = ( + f"""Here is more of the HTML:\n```html\n{expanded_html[start:end]}\n```""" + ) + + if len(expanded_html) > end: + content += ( + "\nThere is still more HTML to see. You can request more if needed." + ) + else: + content += "\nThis is the end of the HTML content." + + return [{"role": "user", "content": content}] diff --git a/dendrite/logic/extract/prompts.py b/dendrite/logic/extract/prompts.py new file mode 100644 index 0000000..35c1037 --- /dev/null +++ b/dendrite/logic/extract/prompts.py @@ -0,0 +1,230 @@ +def get_script_prompt(final_compressed_html: str, prompt: str, current_url: str): + return f"""Compressed HTML: +{final_compressed_html} + +Please look at the HTML DOM above and use execute_code to accomplish the user's task. + +Don't use the attributes 'is-compressed' and 'd-id' inside your script. + +Prefer using soup.select() over soup.find_all(). + +If you are asked to fetch text from an article or similar it's generally a good idea to find the element(s) containing the article text and extracting the text from those. You'll also need to remove unwanted text from elements that isn't article text. + +All elements with the attribute is-compressed="true" are collapsed and may contain hidden elements. If you need to use an element that is compressed you have to call expand_html_further, example: + +expand_html_further({{"prompt": "I need to understand the structure of at least one product to create a script that fetches each product, since all the products are compressed I'll expand the first two ones. I'll also expand the pagenation controls since they are relevant for the task.", "d-ids_to_expand": "3uy9v2, 3uy9d2, -29ahd"}}) + +When scraping a list of items make sure at least one of the items is fully expanded to understand each items' structure before you code. You don't need to expand all items if you can see that there is a repeating structure. + +You code must be a full implementation that solves the user's task. + +Try to make your scripts as general as possible. They should work for different pages with a similar html structure if possible. No hard-coded values that'll only work for the page above. + +Finally, the script must contain a variable called 'response_data'. This variable is sent back to the user and it must match the match the specification inside their prompt listed below. + +Current URL: {current_url} +User's Prompt: +{prompt}""" + + +def expand_futher_prompt( + compressed_html: str, + max_iterations: int, + iterations: int, + reasoning_prompt: str, + question: str, +): + return f"""{compressed_html} + +Please look at the compressed HTML above and output a comma separated of elements that need to be de-compressed so that the task can be solved. + +Task: '{question}' + +Every element with the attribute is-compressed="true" can be de-compressed. Compressed elements may contain hidden elements such as anchor tags and buttons, so it's really important that relevant element to the task are expanded. + +You'll get max {max_iterations} interations to explore the HTML DOM Tree. + +You are currently on iteration {iterations}. Try to expand the DOM in relevant places at least three times. + +{reasoning_prompt} + +It's really important that you expand ALL the elements you believe could be useful for the task! However, in situations where you have repeating elements, such as products elements in a product list or sections of paragraphs in an article, you only need to expand a few of the repeating elements to be able to understand the others' structure. + +Now you may output: +- Ids to inspect further prefixed by some short reasoning (Don't expand irrelevant element and avoid outputting many IDs since that increases the token size of the HTML preview) +- "Done" once every relevant element is expanded. +- An error message if the task is too vauge or not possible to complete. A common use-case for the error message is when a page loads incorrectly and none of the task's data is available. + +See the examples below to see each outputs format: + +EXAMPLE OUTPUT +Reasoning: Most of the important elements are expanded, but I still need to understand the article's headings' HTML structure. To do this I'll expand the first section heading with the text 'hello kitty' and the d-id adh2ia. I'll also expand the related infobox with the id -s29as. By expanding these I'll be able to understand all the article's titles. +Ids: adh2ia, -s29as +END EXAMPLE OUTPUT + +EXAMPLE OUTPUT +Reasoning: To understand the structure of the compressed product cards in the product list I'll expand the three first ones with the d-ids -7ap2j1, -7ap288 and -7ap2au. I'll also the pagenation controls at the bottom of the product list since pagenation can be useful for the task, this includes the page buttons for '1', '2' and '3' button with the d-ids j02ajd, j20had, j9dwh9 and the 'next page' button with the id j9dwss. +Ids: -7ap2j1, -7ap288, -7ap2au, j02ajd, j20had, j9dwh9, j9dwss +END EXAMPLE OUTPUT + +EXAMPLE OUTPUT +Done +END EXAMPLE OUTPUT + +EXAMPLE OUTPUT +Error: I don't understand what is mean with 'extract the page text', this page is completely empty. +END EXAMPLE OUTPUT""" + + +def generate_prompt_extract_compressed_html( + combined_prompt: str, + expanded_html: str, + current_url: str, +): + return f"""You are a web scraping agent that runs one action at a time by outputting a message with either elements to decompress, code to run or a status message. Never run several actions in the same message. + +Code a bs4 or regex script that can solve the task listed below for the webpage I'll specify below. First, inspect relevant areas of the DOM. + + +{combined_prompt} + + +Here is a compressed version of the webpage's HTML: + +```html +{expanded_html} +``` + + +Important: Every element with the attribute `is-compressed="true"` is compressed – compressed elements may contain hidden elements such as anchor tags and buttons, so you need to decompress them to fully understand their structure before you write a script! + +Below are your available functions and how to use them: + +Start by outputting one or more d-ids of elements you'd like to decompress before you right a script. Focus on decompressing elements that look relevant to the task. If possible, expand one d-id at a time. Output in a format like this: + +[Short reasoning first.] +```json +{{ + "d-ids": ["xxx", "yyy"] +}} +``` + +Once you have decompressed the DOM at least one time in separate messages and have a good enough understanding of the page's structure, write some python code to extract the required data using bs4 or regex. `from datetime import datetime` is available. + +Your code will be ran inside exec() so don't use a return statement, just create variables. + +To scrape information from the current page use the predefined variable `html_string` (all the page's html as a string) or `soup` (current page's root's bs4 object). Don't use 'd-id' and 'is_compressed' in your script since these are temporary. Use selectors native to the site. + +The script must contain a variable called 'response_data' and it's structure must match the task listed above. + +Don't return a response_data with hardcoded values that only work for the current page. The script must be general and work for similar pages with the same structure. + +Unless specified, return an exception if a expected value cannot be extracted. + +The current URL is: {current_url} + +Here's how you can do it in a message: + +[Do some reasoning first] +```python +# Simple bs4 code that fetches all the page's hrefs +response_data = [a.get('href') for a in soup.find_all('a')] # Uses the predefined soup variable +``` + +If the task isn't possible to complete (maybe because the task is too vauge, the page contains an error or the page failed to load) don't try and create a script with many assumptions. Instead, output an error like this: + +```json +{{ + "error": "error message" +}} +``` + +Once you've created and ran a script and you are happy with response_data, output a short success message (max one paragraph) containing json like this, the response_data will automatically be returned to the user once you send this message, you don't need to output it: + +```json +{{ + "success": "Write one-two sentences about how your the script works and how you ended up with the result you got." +}} +``` + +Don't include both the python code and json object in the same message. + +Be sure that the the script has been execucted and you have seen the response_data in a previous message before you output the success message.""" + + +LARGE_HTML_CHAR_TRUNCATE_LEN = 40000 + + +def create_script_prompt_segmented_html( + combined_prompt: str, + expanded_html: str, + current_url: str, +): + if len(expanded_html) / 4 > LARGE_HTML_CHAR_TRUNCATE_LEN: + html_prompt = f"""```html + {expanded_html[:LARGE_HTML_CHAR_TRUNCATE_LEN]} +``` +This HTML is truncated to {LARGE_HTML_CHAR_TRUNCATE_LEN} characters since it was too large. If you need to see more of the HTML, output a message like this: +```json +{{ + "request_more_html": true +}} +``` +""" + else: + html_prompt = f"""```html + {expanded_html} +``` +""" + + return f"""You are a web scraping agent that analyzes HTML and writes Python scripts to extract data. Your task is to solve the following request for the webpage specified below. + + +{combined_prompt} + + +Current URL: {current_url} + +Here is a truncated version of the HTML that focuses on relevant parts of the webpage (some elements are have been replaced with their text contents): +{html_prompt} + +Instructions: +1. Analyze the provided HTML segments carefully. + +2. Use bs4 or regex. `from datetime import datetime` is available. +- Your code will be ran inside exec() so don't use a return statement, just create variables. +- To scrape information from the current page use the predefined variable `html_string` (all the page's html as a string) or `soup` (current page's root's bs4 object). Don't use 'd-id' and 'is_compressed' in your script since these are temporary. Use selectors native to the site. +- The script must contain a variable called 'response_data' and it's structure must match the task listed above. +- Don't return a response_data with hardcoded values that only work for the current page. The script must be general and work for similar pages with the same structure. +- Unless specified, return an exception if a expected value cannot be extracted. + +3. Output your Python script in this format: +[Do some reasoning first] +```python +# Simple bs4 code that fetches all the page's hrefs +response_data = [a.get('href') for a in soup.find_all('a')] # Uses the predefined soup variable +``` + +Don't output an explaination of the script after the code. Just do some short reasoning before. + +4. If the task isn't possible to complete, output an error message like this: +```json +{{ + "error": "Detailed error message explaining why the task can't be completed" +}} +``` + +5. Once you've successfully created and ran a script, seen that the output is correct and you're happy with it, output a short success message: +```json +{{ + "success": "Brief explanation of how your script works and how you arrived at the result" +}} +``` +Remember: +- Only output one action at a time (element index to expand, Python code, or status message). +- Don't include both Python code and JSON objects in the same message. +- Ensure the script has been executed and you've seen the `response_data` before sending the success message. +- Do short reasoning before you output an action, max one-two sentences. +- Never include a success message in the same output as your Python code. Always output the success message after you've seen the result of your code. + +You may now begin by analyzing the HTML or requesting to expand specific elements if needed.""" diff --git a/dendrite/logic/extract/scroll_agent.py b/dendrite/logic/extract/scroll_agent.py new file mode 100644 index 0000000..9038557 --- /dev/null +++ b/dendrite/logic/extract/scroll_agent.py @@ -0,0 +1,232 @@ +import json +import re +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import List, Literal, Optional + +from loguru import logger +from openai.types.chat.chat_completion_content_part_param import ( + ChatCompletionContentPartParam, +) + +from dendrite.logic.llm.agent import Agent, Message +from dendrite.logic.llm.config import LLMConfig +from dendrite.models.page_information import PageInformation + +ScrollActionStatus = Literal["done", "scroll_down", "loading", "error"] + + +@dataclass +class ScrollResult: + element_to_inspect_html: List[str] + segment_index: int + status: ScrollActionStatus + message: Optional[str] = None + + +class ScrollRes(ABC): + @abstractmethod + def parse(self, data_dict: dict, segment_i: int) -> Optional[ScrollResult]: + pass + + +class ElementPromptsAction(ScrollRes): + def parse(self, data_dict: dict, segment_i: int) -> Optional[ScrollResult]: + if "element_to_inspect_html" in data_dict: + + status = ( + "scroll_down" + if not data_dict.get("continue_scrolling", False) + else "done" + ) + + return ScrollResult(data_dict["element_to_inspect_html"], segment_i, status) + return None + + +class LoadingAction(ScrollRes): + def parse(self, data_dict: dict, segment_i: int) -> Optional[ScrollResult]: + if data_dict.get("is_loading", False): + return ScrollResult([], segment_i, "loading") + return None + + +class ErrorRes(ScrollRes): + def parse(self, data_dict: dict, segment_i: int) -> Optional[ScrollResult]: + if "error" in data_dict: + return ScrollResult( + [], + segment_i, + "error", + data_dict["error"], + ) + return None + + +class ScrollAgent(Agent): + def __init__(self, page_information: PageInformation, llm_config: LLMConfig): + super().__init__(llm_config.get("scroll_agent")) + self.page_information = page_information + self.choices: List[ScrollRes] = [ + ElementPromptsAction(), + LoadingAction(), + ErrorRes(), + ] + + self.logger = logger.bind(agent="scroll_agent") + + async def scroll_through_page( + self, + combined_prompt: str, + image_segments: List[str], + ) -> ScrollResult: + messages = [self.create_initial_message(combined_prompt, image_segments[0])] + all_elements_to_inspect_html = [] + current_segment = 0 + + while current_segment < len(image_segments): + data_dict = await self.process_segment(messages) + + for choice in self.choices: + result = choice.parse(data_dict, current_segment) + if result: + if result.element_to_inspect_html: + all_elements_to_inspect_html.extend( + result.element_to_inspect_html + ) + return result + + if "element_to_inspect_html" in data_dict: + all_elements_to_inspect_html.extend( + data_dict["element_to_inspect_html"] + ) + + if self.should_continue_scrolling( + data_dict, current_segment, len(image_segments) + ): + current_segment += 1 + scroll_message = self.create_scroll_message( + image_segments[current_segment] + ) + messages.append(scroll_message) + else: + break + + return ScrollResult(all_elements_to_inspect_html, current_segment, "done") + + async def process_segment(self, messages: List[Message]) -> dict: + + text = await self.call_llm(messages) + messages.append({"role": "assistant", "content": text}) + + json_pattern = r"```json(.*?)```" + + json_matches = re.findall(json_pattern, text, re.DOTALL) + + if len(json_matches) > 1: + logger.warning("Agent output multiple actions in one message") + error_message = "Error: Please output only one action at a time." + messages.append({"role": "user", "content": error_message}) + raise Exception(error_message) + elif json_matches: + return json.loads(json_matches[0].strip()) + + error_message = "No valid JSON found in the response" + logger.error(error_message) + messages.append({"role": "user", "content": error_message}) + raise Exception(error_message) + + def create_initial_message(self, combined_prompt: str, first_image: str) -> Message: + content: List[ChatCompletionContentPartParam] = [ + { + "type": "text", + "text": f"""You are a web scraping agent that can code scripts to solve the web scraping tasks listed below for the webpage I'll specify. Before we start coding, we need to inspect the html of the page closer. + +This is the web scraping task: + +{combined_prompt} + + +Analyze the viewport and decide on the next action: + +1. Identify elements that we want to inspect closer so we can write the script. Do this by outputting a message with a list of prompts to find the relevant element(s). + +Output as few elements as possible, but it should be enought to gain a proper understanding of the DOM for our script. + +If a list of items need to be extracted, consider getting a few unique examples of items from the list that differ slightly so we can create code that accounts for their differences. Avoid listing several elements that are very similar since we can infer the structure of one or two of them to the rest. + +Don't get several different parts of one relevant element, just get the whole element since it's easier to just inspect the whole element. + +Avoid selecting very large elements that contain a lot of html since it can be very overwhelming to inspect. + +Always be specific about the element you are thinking of, don't write 'get a item', write 'get the item with the text "Item Name"'. + +Here's an example of a good output: +[Short reasoning first, max one paragraph] +```json +{{ + "element_to_inspect_html": ["The small container containing the weekly amount of downloads, labeled 'Weekly Downloads'", "The element containing the main body of article text, the title is 'React Router DOM'."], + "continue_scrolling": true/false (only scroll down if you think more relevant elements are further down the page, only do this if you need to) +}} +``` + +2. If you can't see relevant elements just yet, but you think more data might be available further down the page, output: +[Short reasoning first, max one paragraph] +```json +{{ + "scroll_down": true +}} +``` + +3. This page was first loaded {round(self.page_information.time_since_frame_navigated, 2)} second(s) ago. If the page is blank or the data is not available on the current page it could be because the page is still loading. If you believe this is the case, output: +[Short reasoning first, max one paragraph] +```json +{{ + "is_loading": true +}} +``` + +4. In case you the data is not available on the current page and the task does not describe how to handle the non-available data, or there seems to be some kind of mistake, output a json with a short error message, like this: +[Short reasoning first, max one paragraph] +```json +{{ + "error": "This page doesn't contain any package data, welcome page for 'dendrite.systems', it won't be possible to code a script to extract the requested data.", + "was_blocked_by_recaptcha": true/false +}} +``` + +Continue scrolling and accumulating element prompts until you feel like we have enough elements to inspect to create an excellent script. + +Important: Only output one json object per message. + +Below is a screenshot of the current page, if it looks blank or empty it could still be loading. If this is the case, don't guess what elements to inspect, respond with is loading.""", + }, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{first_image}"}, + }, + ] + + msg: Message = {"role": "user", "content": content} + return msg + + def create_scroll_message(self, image: str) -> Message: + return { + "role": "user", + "content": [ + {"type": "text", "text": "Scrolled down, here is the new viewport:"}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image}", + }, + }, + ], + } + + def should_continue_scrolling( + self, data_dict: dict, current_index: int, total_segments: int + ) -> bool: + return ( + "scroll_down" in data_dict or data_dict.get("continue_scrolling", False) + ) and current_index + 1 < total_segments diff --git a/dendrite/sync_api/_core/__init__.py b/dendrite/logic/get_element/__init__.py similarity index 100% rename from dendrite/sync_api/_core/__init__.py rename to dendrite/logic/get_element/__init__.py diff --git a/dendrite/logic/get_element/agents/prompts/__init__.py b/dendrite/logic/get_element/agents/prompts/__init__.py new file mode 100644 index 0000000..b6ffe51 --- /dev/null +++ b/dendrite/logic/get_element/agents/prompts/__init__.py @@ -0,0 +1,199 @@ +SEGMENT_PROMPT = """You are an agent that is given the task to find candidate elements that match the element that the user is looking for. You will get multiple segments of the html of the page and a description of the element that the user is looking for. +The description can be the text that the element contains, the type of element. You might get both short and long descriptions. +Don't only look for the exact match of the text. + +Look at aria-label if there are any as they are helpful in identifying the elements. + +You will get the information in the following format: + + + DESCRIPTION + + + + HTML CONTENT + +... + + HTML CONTENT + + +Each element will have an attribute called d-id which you should refer to if you can find the elements that the user is looking for. There might be multiple elements that are fit the user's request, if so include multiple d_id:s. +If you've selected an element you should NOT select another element that is a child of the element you've selected. +Be sure to include a reason for why you selected the elements that you did. Think step by step, what made you choose this element over the others. +Your response should include 2-3 sentences of reasoning and a code block containing json including the backticks, the reason text is just a placeholder. Always include a sentence of reasoning in the output: + +```json +{ + "reason": , + "d_id": ["125292", "9541ad"], + "status": "success" +} +``` + +If no element seems to match the user's request, or you think the page is still loading, output the following with 2-3 sentences of reasoning in the output: + +```json +{ + "reason": , + "status": "failed" or "loading" +} +``` + +Here are some examples to help you understand the task (your response is the content under "Assistant:"): + +Example 1: + +USER: Can you get the d_id of the element that matches this description? + + + pull requests count + + + +
  • + + + Pull requests + + + 14 + + +
  • +
    + +ASSISTANT: + +```json +{ + "reason": "I selected this element because it has the class Counter and is a number next to the pull requests text.", + "d_id": ["235512"], + "status": "success" +} +``` + +Example 2: + +USER: Can you get the d_id of the element that matches this description? + + + search bar + + + +
    or tags or their content in your response.""" + +SELECT_PROMPT = """You are a web scraping agent who is an expert at selecting element(s) that the user is asking for. + +You will get the information in the following format: + + + DESCRIPTION + + +```html +HTML CONTENT +``` + +Try to select a single element that you think is the best match for the user's request. The element should be as small as possible while still containing the information that the user is looking for. If there are wrappers select the element inside. Be sure to include a reason for why you selected the element that you did. +To select an element you should refer to the d-id attribute which is a unique identifier for each element. + +Your response should be in the following format, including the backticks. Do all your reasoning in the `reason` field, only output the json: + +```json +{ + "reason": "After looking at the HTML it is clear that '98jorq3' is the correct element since is contains the text 'Hello World' which is exactly what the user asked for.", + "d_id": ["98jorq3"], + "status": "success" +} +``` + +If the requested element doesn't seem to be available on the page, that's OK. Return the following format, including the backticks: + +```json +{ + "reason": "This page doesn't seem to contain any link for a 'Github repository' as requested. The page has had a couple of seconds to load too and there are links for twitter and facebook, but no github. So, it's impossible to find the requested element on this page.", + "status": "impossible" +} +``` + +A page could still be loading, if this is the case you should return the following format, including the backticks: + +```json +{ + "reason": "Since the requested element is missing and the page only loaded in 2 seconds ago, I believe the page is still loading. Let's wait for the page to load and try again.", + "status": "loading" +} +``` + +Here is an example to help you understand how to select the best element: + +USER: + + pull requests count next to commits count + + +```html + + + ... +
  • + + + Commits + + + 24 + + +
  • +
  • + + + Pull requests + + + 14 + + +
  • + ... + + +``` + +ASSISTANT: +```json +{ + "reason": "This is tricky, there are a few elements that could match the user's request (s8yy81 and 781faa), however I selected the element with the d-id 's8yy81' because the span a class Counter and contains a number and is next to a span with the text pull requests.", + "d_id": ["s8yy81"], + "status": "success" +} +``` + +IMPORTANT! +Your reasoning must be limited to 3-4 sentences. +""" diff --git a/dendrite/logic/get_element/agents/segment_agent.py b/dendrite/logic/get_element/agents/segment_agent.py new file mode 100644 index 0000000..e03f3fd --- /dev/null +++ b/dendrite/logic/get_element/agents/segment_agent.py @@ -0,0 +1,119 @@ +import json +import re +from typing import Annotated, List, Literal, Union + +from annotated_types import Len +from loguru import logger +from pydantic import BaseModel, ValidationError + +from dendrite.logic.llm.agent import Agent +from dendrite.logic.llm.config import LLMConfig + +from .prompts import SEGMENT_PROMPT + + +class SegmentAgentSuccessResponse(BaseModel): + reason: str + status: Literal["success"] + d_id: Annotated[List[str], Len(min_length=1)] + index: int = 99999 # placeholder since the agent doesn't output this + + +class SegmentAgentFailureResponse(BaseModel): + reason: str + status: Literal["failed", "loading", "impossible"] + index: int = 99999 # placeholder since the agent doesn't output this + + +SegmentAgentReponseType = Union[ + SegmentAgentSuccessResponse, SegmentAgentFailureResponse +] + + +def parse_segment_output(text: str, index: int) -> SegmentAgentReponseType: + json_pattern = r"```json(.*?)```" + res = None + + if text is None: + return SegmentAgentFailureResponse( + reason="No content", status="failed", index=index + ) + + json_matches = re.findall(json_pattern, text, re.DOTALL) + + if not json_matches: + return SegmentAgentFailureResponse( + reason="No JSON matches", status="failed", index=index + ) + + json_match = json_matches[0] + try: + json_data = json.loads(json_match) + if "d_id" in json_data and "reason" in json_data: + ids = json_data["d_id"] + if len(ids) == 0: + logger.warning( + f"Success message was output, but no d_ids provided: {json_data}" + ) + return SegmentAgentFailureResponse( + reason="No d_ids provided", status="failed", index=index + ) + + res = SegmentAgentSuccessResponse( + reason=json_data["reason"], + status="success", + d_id=json_data["d_id"], + ) + except json.JSONDecodeError as e: + raise ValueError(f"Failed to decode JSON: {e}") + + if res is None: + try: + res = SegmentAgentFailureResponse.model_validate_json(json_matches[0]) + except ValidationError as e: + logger.bind(json=json_matches[0]).error( + f"Failed to parse JSON: {e}", + ) + res = SegmentAgentFailureResponse( + reason="Failed to parse JSON", status="failed", index=index + ) + + res.index = index + return res + + +async def extract_relevant_d_ids( + prompt: str, segments: List[str], index: int, llm_config: LLMConfig +) -> SegmentAgentReponseType: + agent = Agent(llm_config.get("segment_agent"), system_prompt=SEGMENT_PROMPT) + message = "" + for segment in segments: + message += ( + f"""###### SEGMENT ######\n\n{segment}\n\n###### SEGMENT END ######\n\n""" + ) + + message += f"Can you get the d_id of the elements that match the following description:\n\n{prompt} element\n\nIf you've selected an element you should NOT select another element that is a child of the element you've selected. It is important that you follow this." + message += """\nOutput how you think. Think step by step. if there are multiple candidate elements return all of them. Don't make up d-id for elements if they are not present/don't match the description. Limit your reasoning to 2-3 sentences\nOnly include the json block – don't output an array, only ONE object.""" + + max_retries = 3 + for attempt in range(max_retries): + res = await agent.add_message(message) + if res is None: + message = "I didn't receive a response. Please try again." + continue + + try: + parsed_res = parse_segment_output(res, index) + # If we successfully parsed the result, return it + return parsed_res + except Exception as e: + # If we encounter a ValueError, ask the agent to correct its output + logger.warning(f"Error in segment agent: {e}") + message = f"An exception occurred in your output: {e}\n\nPlease correct your output and try again. Ensure you're providing a valid JSON response." + + # If we've exhausted all retries, return a failure response + return SegmentAgentFailureResponse( + reason="Max retries reached without successful parsing", + status="failed", + index=index, + ) diff --git a/dendrite/logic/get_element/agents/select_agent.py b/dendrite/logic/get_element/agents/select_agent.py new file mode 100644 index 0000000..1c2f6d0 --- /dev/null +++ b/dendrite/logic/get_element/agents/select_agent.py @@ -0,0 +1,98 @@ +import re +from typing import List, Optional, Tuple + +from loguru import logger +from openai.types.chat import ChatCompletion +from pydantic import BaseModel + +from dendrite.browser._common.types import Status +from dendrite.logic.llm.agent import Agent +from dendrite.logic.llm.config import LLMConfig + +from ..hanifi_segment import SelectedTag +from .prompts import SELECT_PROMPT + + +class SelectAgentResponse(BaseModel): + reason: str + d_id: Optional[List[str]] = None + status: Status + + +async def select_best_tag( + expanded_html_tree: str, + tags: List[SelectedTag], + prompt: str, + time_since_frame_navigated: Optional[float], + llm_config: LLMConfig, + return_several: bool = False, +) -> Tuple[int, int, Optional[SelectAgentResponse]]: + + agent = Agent(llm_config.get("select_agent"), system_prompt=SELECT_PROMPT) + + message = f"\n{prompt}\n" + + tags_str = "\n".join([f"d-id: {tag.d_id} - reason: '{tag.reason}'" for tag in tags]) + + message += f"""\n\nA smaller and less intelligent AI agent has combed through the html document and found these elements that seems to match the element description:\n\n{tags_str}\n\nThis agent is very primitive however, so don't blindly trust it. Make sure you carefully look at this truncated version of the html document and do some proper reasoning in which you consider the different potential elements:\n\n```html\n{expanded_html_tree}\n```\n""" + + if return_several: + message += f"""Please look at the HTML Tree and output a list of d-ids that matches the ELEMENT_DESCRIPTION.""" + else: + message += f"""Please look at the HTML Tree and output the best d-id that matches the ELEMENT_DESCRIPTION. Only return ONE d-id.""" + + if time_since_frame_navigated: + message += f"""\n\nThis page was first loaded {round(time_since_frame_navigated, 2)} second(s) ago. If the page is blank or the data is not available on the current page it could be because the page is still loading.\n\nDon't return an element that isn't what the user asked for, in this case it is better to return `status: impossible` or `status: loading` if you think the page is still loading.""" + + res = await agent.add_message(message) + + logger.info(f"Select agent response: {res}") + + parsed = await parse_select_output(res) + + # token_usage = res.usage.input_tokens + res.usage.output_tokens + return (0, 0, parsed) + + +async def parse_select_output(text: str) -> Optional[SelectAgentResponse]: + json_pattern = r"```json(.*?)```" + + json_matches = re.findall(json_pattern, text, re.DOTALL) + + if not json_matches: + return None + + try: + model = SelectAgentResponse.model_validate_json(json_matches[0]) + except Exception as e: + model = None + + return model + + +async def parse_openai_select_response( + result: ChatCompletion, +) -> Optional[SelectAgentResponse]: + json_pattern = r"```json(.*?)```" + + # Ensure the result has a message and content field + if len(result.choices) == 0 or result.choices[0].message.content is None: + return None + + # Extract the text content + text = result.choices[0].message.content + + # Find JSON formatted code block in the response text + json_matches = re.findall(json_pattern, text, re.DOTALL) + + if not json_matches: + return None + + try: + # Attempt to validate and parse the JSON match + model = SelectAgentResponse.model_validate_json(json_matches[0]) + except Exception as e: + # In case of any error during parsing + model = None + + return model diff --git a/dendrite/logic/get_element/cache.py b/dendrite/logic/get_element/cache.py new file mode 100644 index 0000000..8cf9965 --- /dev/null +++ b/dendrite/logic/get_element/cache.py @@ -0,0 +1,30 @@ +from datetime import datetime +from typing import List, Optional +from urllib.parse import urlparse + +from dendrite.logic.cache.file_cache import FileCache +from dendrite.models.selector import Selector + + +async def get_selector_from_cache( + url: str, prompt: str, cache: FileCache[Selector] +) -> Optional[List[Selector]]: + netloc = urlparse(url).netloc + + return cache.get({"netloc": netloc, "prompt": prompt}) + + +async def add_selector_to_cache( + prompt: str, bs4_selector: str, url: str, cache: FileCache[Selector] +) -> None: + created_at = datetime.now().isoformat() + netloc = urlparse(url).netloc + selector: Selector = Selector( + prompt=prompt, + selector=bs4_selector, + url=url, + netloc=netloc, + created_at=created_at, + ) + + cache.append({"netloc": netloc, "prompt": prompt}, selector) diff --git a/dendrite/logic/get_element/get_element.py b/dendrite/logic/get_element/get_element.py new file mode 100644 index 0000000..8e28ec7 --- /dev/null +++ b/dendrite/logic/get_element/get_element.py @@ -0,0 +1,110 @@ +from typing import List, Optional + +from bs4 import BeautifulSoup, Tag +from loguru import logger + +from dendrite.logic.config import Config +from dendrite.logic.dom.css import check_if_selector_successful, find_css_selector +from dendrite.logic.dom.strip import remove_hidden_elements +from dendrite.logic.get_element.cache import ( + add_selector_to_cache, + get_selector_from_cache, +) +from dendrite.models.dto.cached_selector_dto import CachedSelectorDTO +from dendrite.models.dto.get_elements_dto import GetElementsDTO +from dendrite.models.response.get_element_response import GetElementResponse +from dendrite.models.selector import Selector + +from .hanifi_search import hanifi_search + + +async def get_element(dto: GetElementsDTO, config: Config) -> GetElementResponse: + if isinstance(dto.prompt, str): + return await process_prompt(dto.prompt, dto, config) + raise ... + + +async def process_prompt( + prompt: str, dto: GetElementsDTO, config: Config +) -> GetElementResponse: + soup = BeautifulSoup(dto.page_information.raw_html, "lxml") + return await get_new_element(soup, prompt, dto, config) + + +async def get_new_element( + soup: BeautifulSoup, prompt: str, dto: GetElementsDTO, config: Config +) -> GetElementResponse: + soup_without_hidden_elements = remove_hidden_elements(soup) + element = await hanifi_search( + soup_without_hidden_elements, + prompt, + config, + dto.page_information.time_since_frame_navigated, + ) + interactable = element[0] + + if interactable.status == "success": + if interactable.dendrite_id is None: + interactable.status = "failed" + interactable.reason = "No d-id found returned from agent" + print(interactable.dendrite_id) + tag = soup_without_hidden_elements.find( + attrs={"d-id": interactable.dendrite_id} + ) + if isinstance(tag, Tag): + selector = find_css_selector(tag, soup) + cache = config.element_cache + await add_selector_to_cache( + prompt, + bs4_selector=selector, + url=dto.page_information.url, + cache=cache, + ) + return GetElementResponse( + selectors=[selector], + message=interactable.reason, + d_id=interactable.dendrite_id, + status="success", + ) + interactable.status = "failed" + interactable.reason = "d-id does not exist in the soup" + + return GetElementResponse( + message=interactable.reason, + status=interactable.status, + ) + + +async def get_cached_selector(dto: CachedSelectorDTO, config: Config) -> List[Selector]: + if not isinstance(dto.prompt, str): + return [] + db_selectors = await get_selector_from_cache( + dto.url, dto.prompt, config.element_cache + ) + + if db_selectors is None: + return [] + + return db_selectors + + +# async def check_cache( +# soup: BeautifulSoup, url: str, prompt: str, only_one: bool, config: Config +# ) -> Optional[GetElementResponse]: +# cache = config.element_cache +# db_selectors = await get_selector_from_cache(url, prompt, cache) + +# if db_selectors is None: +# return None + +# if check_if_selector_successful(db_selectors.selector, soup, only_one): +# return GetElementResponse( +# selectors=[db_selectors.selector], +# status="success", +# ) + + +# async def get_cached_selector(dto: GetCachedSelectorDTO) -> Optional[Selector]: +# cache = config.element_cache +# db_selectors = await get_selector_from_cache(dto.url, dto.prompt, cache) +# return db_selectors diff --git a/dendrite/logic/get_element/hanifi_search.py b/dendrite/logic/get_element/hanifi_search.py new file mode 100644 index 0000000..be8d357 --- /dev/null +++ b/dendrite/logic/get_element/hanifi_search.py @@ -0,0 +1,170 @@ +import asyncio +from typing import Any, Coroutine, List, Optional, Tuple, Union + +from bs4 import BeautifulSoup, Tag + +from dendrite.logic.config import Config +from dendrite.logic.dom.strip import strip_soup +from dendrite.logic.llm.config import LLMConfig + +from .agents import segment_agent, select_agent +from .agents.segment_agent import ( + SegmentAgentFailureResponse, + SegmentAgentReponseType, + SegmentAgentSuccessResponse, + extract_relevant_d_ids, +) +from .hanifi_segment import SelectedTag, expand_tags, hanifi_segment +from .models import Element + + +async def get_expanded_dom( + soup: BeautifulSoup, prompt: str, llm_config: LLMConfig +) -> Optional[Tuple[str, List[SegmentAgentReponseType], List[SelectedTag]]]: + + new_nodes = hanifi_segment(soup, 6000, 3) + tags = await get_relevant_tags(prompt, new_nodes, llm_config) + + succesful_d_ids = [ + (tag.d_id, tag.index, tag.reason) + for tag in tags + if isinstance(tag, SegmentAgentSuccessResponse) + ] + + flat_list = [ + SelectedTag( + d_id, + reason=segment_d_ids[2], + index=segment_d_ids[1], + ) + for segment_d_ids in succesful_d_ids + for d_id in segment_d_ids[0] + ] + dom = expand_tags(soup, flat_list) + if dom is None: + return None + return dom, tags, flat_list + + +async def hanifi_search( + soup: BeautifulSoup, + prompt: str, + config: Config, + time_since_frame_navigated: Optional[float] = None, + return_several: bool = False, +) -> List[Element]: + + stripped_soup = strip_soup(soup) + expand_res = await get_expanded_dom(stripped_soup, prompt, config.llm_config) + + if expand_res is None: + return [Element(status="failed", reason="No element found when expanding HTML")] + + expanded, tags, flat_list = expand_res + + failed_messages = [] + succesful_tags: List[SegmentAgentSuccessResponse] = [] + for tag in tags: + if isinstance(tag, SegmentAgentFailureResponse): + failed_messages.append(tag) + else: + succesful_tags.append(tag) + + if len(succesful_tags) == 0: + return [Element(status="failed", reason="No relevant tags found in DOM")] + + (input_token, output_token, res) = await select_agent.select_best_tag( + expanded, + flat_list, + prompt, + time_since_frame_navigated, + config.llm_config, + return_several, + ) + + if not res: + return [Element(status="failed", reason="Failed to get element")] + + if res.d_id: + if return_several: + return [ + Element(status=res.status, dendrite_id=d_id, reason=res.reason) + for d_id in res.d_id + ] + else: + return [ + Element(status=res.status, dendrite_id=res.d_id[0], reason=res.reason) + ] + + return [Element(status=res.status, dendrite_id=None, reason=res.reason)] + + +async def get_relevant_tags( + prompt: str, + segments: List[List[str]], + llm_config: LLMConfig, +) -> List[SegmentAgentReponseType]: + + tasks: List[Coroutine[Any, Any, SegmentAgentReponseType]] = [] + + for index, segment in enumerate(segments): + tasks.append(extract_relevant_d_ids(prompt, segment, index, llm_config)) + + results: List[SegmentAgentReponseType] = await asyncio.gather(*tasks) + if results is None: + return [] + + return results + + +def get_if_one_tag( + lst: List[SegmentAgentSuccessResponse], +) -> Optional[SegmentAgentSuccessResponse]: + curr_item = None + for item in lst: + if isinstance(item, SegmentAgentSuccessResponse): + d_id_count = len(item.d_id) + if d_id_count > 1: # There are multiple d_ids + return None + + if curr_item is None: + curr_item = item # There should always be atleast one d_id + else: # We have already found a d_id + return None + + return curr_item + + +def process_segments( + nodes: List[Union[Tag, BeautifulSoup]], threshold: int = 5000 +) -> List[List[Union[Tag, BeautifulSoup]]]: + processed_segments: List[List[Union[Tag, BeautifulSoup]]] = [] + grouped_segments: List[Union[Tag, BeautifulSoup]] = [] + current_len = 0 + for index, node in enumerate(nodes): + node_len = len(str(node)) + + if current_len + node_len > threshold: + processed_segments.append(grouped_segments) + grouped_segments = [] + current_len = 0 + + grouped_segments.append(node) + current_len += node_len + + if grouped_segments: + processed_segments.append(grouped_segments) + + return processed_segments + + +def dump_processed_segments(processed_segments: List[List[Union[Tag, BeautifulSoup]]]): + for index, processed_segement in enumerate(processed_segments): + with open(f"processed_segments/segment_{index}.html", "w") as f: + f.write("######\n\n".join(map(lambda x: x.prettify(), processed_segement))) + + +def dump_nodes(nodes: List[Union[Tag, BeautifulSoup]]): + for index, node in enumerate(nodes): + with open(f"nodes/node_{index}.html", "w") as f: + f.write(node.prettify()) diff --git a/dendrite/logic/get_element/hanifi_segment.py b/dendrite/logic/get_element/hanifi_segment.py new file mode 100644 index 0000000..5c6a092 --- /dev/null +++ b/dendrite/logic/get_element/hanifi_segment.py @@ -0,0 +1,233 @@ +import copy +from collections import deque +from dataclasses import dataclass +from typing import List, Optional, Union + +from bs4 import BeautifulSoup, Comment, Doctype, NavigableString, Tag + +from ..dom.truncate import truncate_and_remove_whitespace, truncate_long_string_w_words + + +# Define a threshold (e.g., 30% of the total document size) +def calculate_size(element): + as_str = str(element) + return len(as_str) + + +def format_tag(node: Union[BeautifulSoup, Tag]): + opening_tag = f"<{node.name}" + + # Add all attributes to the opening tag + for attr, value in node.attrs.items(): + opening_tag += f' {attr}="{value}"' + + # Close the opening tag + opening_tag += ">" + return opening_tag + + +@dataclass +class SegmentGroup: + node: List[Union[BeautifulSoup, Tag, str]] + parents: List[Union[BeautifulSoup, Tag]] + idx: int + size: int + order: int = 0 + + +def hanifi_segment( + node: Union[BeautifulSoup, Tag], + threshold, + num_parents: int, +) -> List[List[str]]: + segment_groups = _new_segment_tree( + node, threshold, num_parents, 0, deque(maxlen=num_parents) + ) + return group_segments(segment_groups, threshold * 1.1) + + +def group_segments(segments: List[SegmentGroup], threshold: int) -> List[List[str]]: + grouped_segments: List[List[str]] = [] + current_group: List[str] = [] + current_size = 0 + + for segment in segments: + # If adding the current segment doesn't exceed the threshold + if current_size + segment.size <= threshold: + current_group.append(reconstruct_html(segment)) + current_size += segment.size + else: + # Add the current group to the grouped_segments + grouped_segments.append(current_group) + # Start a new group with the current segment + current_group = [reconstruct_html(segment)] + current_size = segment.size + + # Add the last group if it's not empty + if current_group: + grouped_segments.append(current_group) + + return grouped_segments + + +def reconstruct_html(segment_group: SegmentGroup) -> str: + # Initialize an empty list to build the HTML parts + html_parts = [] + + # If the index is not 0, add "..." before the first sibling node + if segment_group.idx != 0: + html_parts.append("...") + + # Add the string representation of each node in the segment group + for node in segment_group.node: + html_parts.append(str(node)) + + # Combine the node HTML parts + nodes_html = "\n".join(html_parts) + + # Build the HTML by wrapping the nodes_html within the parents + for parent in reversed(segment_group.parents): + # Get the opening tag with attributes + attrs = "".join([f' {k}="{v}"' for k, v in parent.attrs.items()]) + opening_tag = f"<{parent.name}{attrs}>" + closing_tag = f"" + # Wrap the current nodes_html within this parent + nodes_html = f"{opening_tag}\n{nodes_html}\n{closing_tag}" + + # Use BeautifulSoup to parse and prettify the final HTML + soup = BeautifulSoup(nodes_html, "html.parser") + return soup.prettify() + + +def _new_segment_tree( + node: Union[BeautifulSoup, Tag], + threshold: int, + num_parents: int, + index, + queue: deque, +) -> List[SegmentGroup]: + + result_nodes = [] + idx = 0 + current_group: Optional[SegmentGroup] = None + queue.append(node) + for child in node.children: # type: ignore + + if isinstance(child, (NavigableString, Tag)): + size = 0 + if isinstance(child, NavigableString): + child = str(child) + size = len(child) + if size > threshold: + truncated = truncate_long_string_w_words( + child, max_len_start=threshold // 4, max_len_end=threshold // 4 + ) + result_nodes.append( + SegmentGroup( + node=[truncated], + parents=list(queue.copy()), + idx=idx, + size=size, + ) + ) + idx += 1 + continue + + elif isinstance(child, Tag): + size = calculate_size(child) + if size > threshold: + result_nodes.extend( + _new_segment_tree( + child, threshold, num_parents, idx, queue.copy() + ) + ) + idx += 1 + continue + + if current_group is not None: + if current_group.size + size < threshold: + current_group.node.append(child) + current_group.size += size + else: + result_nodes.append(current_group) + # **Create a new current_group with the current child** + current_group = SegmentGroup( + node=[child], parents=list(queue.copy()), idx=idx, size=size + ) + idx += 1 + continue + + # **Initialize current_group if it's None** + current_group = SegmentGroup( + node=[child], parents=list(queue.copy()), idx=idx, size=size + ) + idx += 1 + + if current_group is not None: + result_nodes.append(current_group) + + return result_nodes + + +@dataclass +class SelectedTag: + d_id: str + reason: str + index: int # index of the segment the tag belongs in + + +def expand_tags(soup: BeautifulSoup, tags: List[SelectedTag]) -> Optional[str]: + + target_d_ids = {tag.d_id for tag in tags} + target_elements = soup.find_all( + lambda tag: tag.has_attr("d-id") and tag["d-id"] in target_d_ids + ) + + if len(target_elements) == 0: + return None + + parents_list = [] + for element in target_elements: + parents = list(element.parents) + parents_list.append(parents) + + all_parent_d_ids = frozenset( + d_id + for parents in parents_list + for parent in parents + if isinstance(parent, Tag) and parent.has_attr("d-id") + for d_id in [parent.get("d-id")] + ) + + def traverse_and_simplify(element): + if isinstance(element, Tag): + d_id = element.get("d-id", "") + if element in target_elements: + # Add comments to mark the selected element + element.insert_before(Comment(f"SELECTED ELEMENT START ({d_id})")) + element.insert_after(Comment(f"SELECTED ELEMENT END ({d_id})")) + + # If element is too large, continue traversing since we don't want to display large elements + if len(str(element)) > 40000: + for child in list(element.children): + if isinstance(child, Tag): + traverse_and_simplify(child) + return + elif d_id in all_parent_d_ids or element.name == "body": + for child in list(element.children): + if isinstance(child, Tag): + traverse_and_simplify(child) + elif isinstance(element, Tag) and element.name != "body": + try: + truncated_text = truncate_and_remove_whitespace( + element.get_text(), max_len_start=200, max_len_end=200 + ) + element.replace_with(truncated_text) + except ValueError: + element.replace_with("...") + + soup_copy = copy.copy(soup) + traverse_and_simplify(soup_copy.body) + simplified_html = soup_copy.prettify() + + return simplified_html diff --git a/dendrite/logic/get_element/models.py b/dendrite/logic/get_element/models.py new file mode 100644 index 0000000..c13f842 --- /dev/null +++ b/dendrite/logic/get_element/models.py @@ -0,0 +1,16 @@ +from dataclasses import dataclass +from typing import NamedTuple, Optional + +from dendrite.browser._common.types import Status + + +class ExpandedTag(NamedTuple): + d_id: str + html: str + + +@dataclass +class Element: + status: Status + reason: str + dendrite_id: Optional[str] = None diff --git a/dendrite/sync_api/_core/_managers/__init__.py b/dendrite/logic/llm/__init__.py similarity index 100% rename from dendrite/sync_api/_core/_managers/__init__.py rename to dendrite/logic/llm/__init__.py diff --git a/dendrite/logic/llm/agent.py b/dendrite/logic/llm/agent.py new file mode 100644 index 0000000..4f8c2cf --- /dev/null +++ b/dendrite/logic/llm/agent.py @@ -0,0 +1,236 @@ +import json +from typing import Any, Dict, List, Optional, Union, cast + +import litellm +from litellm.files.main import ModelResponse +from loguru import logger +from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam + +Message = ChatCompletionMessageParam + + +class LLMContextLengthExceededException(Exception): + CONTEXT_LIMIT_ERRORS = [ + "expected a string with maximum length", + "maximum context length", + "context length exceeded", + "context_length_exceeded", + "context window full", + "too many tokens", + "input is too long", + "exceeds token limit", + ] + + def __init__(self, error_message: str): + self.original_error_message = error_message + super().__init__(self._get_error_message(error_message)) + + def _is_context_limit_error(self, error_message: str) -> bool: + return any( + phrase.lower() in error_message.lower() + for phrase in self.CONTEXT_LIMIT_ERRORS + ) + + def _get_error_message(self, error_message: str): + return ( + f"LLM context length exceeded. Original error: {error_message}\n" + "Consider using a smaller input or implementing a text splitting strategy." + ) + + +LLM_CONTEXT_WINDOW_SIZES = { + # openai + "gpt-4": 8192, + "gpt-4o": 128000, + "gpt-4o-mini": 128000, + "gpt-4-turbo": 128000, + "o1-preview": 128000, + "o1-mini": 128000, + # deepseek + "deepseek-chat": 128000, + # groq + "gemma2-9b-it": 8192, + "gemma-7b-it": 8192, + "llama3-groq-70b-8192-tool-use-preview": 8192, + "llama3-groq-8b-8192-tool-use-preview": 8192, + "llama-3.1-70b-versatile": 131072, + "llama-3.1-8b-instant": 131072, + "llama-3.2-1b-preview": 8192, + "llama-3.2-3b-preview": 8192, + "llama-3.2-11b-text-preview": 8192, + "llama-3.2-90b-text-preview": 8192, + "llama3-70b-8192": 8192, + "llama3-8b-8192": 8192, + "mixtral-8x7b-32768": 32768, +} + + +class LLM: + def __init__( + self, + model: str, + timeout: Optional[Union[float, int]] = None, + temperature: Optional[float] = None, + top_p: Optional[float] = None, + n: Optional[int] = None, + stop: Optional[Union[str, List[str]]] = None, + max_completion_tokens: Optional[int] = None, + max_tokens: Optional[int] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[int, float]] = None, + response_format: Optional[Dict[str, Any]] = None, + seed: Optional[int] = None, + logprobs: Optional[bool] = None, + top_logprobs: Optional[int] = None, + base_url: Optional[str] = None, + api_version: Optional[str] = None, + api_key: Optional[str] = None, + callbacks: List[Any] = [], + **kwargs, + ): + self.model = model + self.timeout = timeout + self.temperature = temperature + self.top_p = top_p + self.n = n + self.stop = stop + self.max_completion_tokens = max_completion_tokens + self.max_tokens = max_tokens + self.presence_penalty = presence_penalty + self.frequency_penalty = frequency_penalty + self.logit_bias = logit_bias + self.response_format = response_format + self.seed = seed + self.logprobs = logprobs + self.top_logprobs = top_logprobs + self.base_url = base_url + self.api_version = api_version + self.api_key = api_key + self.callbacks = callbacks + self.kwargs = kwargs + + litellm.drop_params = True + + def call(self, messages: Message) -> str: + + try: + params = { + "model": self.model, + "messages": messages, + "timeout": self.timeout, + "temperature": self.temperature, + "top_p": self.top_p, + "n": self.n, + "stop": self.stop, + "max_tokens": self.max_tokens or self.max_completion_tokens, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "logit_bias": self.logit_bias, + "response_format": self.response_format, + "seed": self.seed, + "logprobs": self.logprobs, + "top_logprobs": self.top_logprobs, + "api_base": self.base_url, + "api_version": self.api_version, + "api_key": self.api_key, + "stream": False, + **self.kwargs, + } + + params = {k: v for k, v in params.items() if v is not None} + + response = litellm.completion(**params) + response = cast(ModelResponse, response) + return response["choices"][0]["message"]["content"] + except Exception as e: + if not LLMContextLengthExceededException(str(e))._is_context_limit_error( + str(e) + ): + logger.error(f"LiteLLM call failed: {str(e)}") + + raise # Re-raise the exception after logging + + async def acall(self, messages: List[Message]) -> ModelResponse: + + try: + params = { + "model": self.model, + "messages": messages, + "timeout": self.timeout, + "temperature": self.temperature, + "top_p": self.top_p, + "n": self.n, + "stop": self.stop, + "max_tokens": self.max_tokens or self.max_completion_tokens, + "presence_penalty": self.presence_penalty, + "frequency_penalty": self.frequency_penalty, + "logit_bias": self.logit_bias, + "response_format": self.response_format, + "seed": self.seed, + "logprobs": self.logprobs, + "top_logprobs": self.top_logprobs, + "api_base": self.base_url, + "api_version": self.api_version, + "api_key": self.api_key, + "stream": False, + **self.kwargs, + } + + params = {k: v for k, v in params.items() if v is not None} + + response = await litellm.acompletion(**params) + response = cast(ModelResponse, response) + return response + except Exception as e: + if not LLMContextLengthExceededException(str(e))._is_context_limit_error( + str(e) + ): + logger.error(f"LiteLLM call failed: {str(e)}") + + raise # Re-raise the exception after logging + + def get_context_window_size(self) -> int: + return int(LLM_CONTEXT_WINDOW_SIZES.get(self.model, 8192) * 0.75) + + +class Agent: + def __init__( + self, + model: Union[LLM, str], + system_prompt: Optional[str] = None, + ): + self.messages: List[Message] = ( + [] if not system_prompt else [{"role": "system", "content": system_prompt}] + ) + + if isinstance(model, str): + self.llm = LLM(model) + else: + self.llm = model + + async def add_message(self, message: str) -> str: + self.messages.append({"role": "user", "content": message}) + + text = await self.call_llm(self.messages) + + self.messages.append({"role": "assistant", "content": text}) + + return text + + async def call_llm(self, messages: List[Message]) -> str: + res = await self.llm.acall(messages) + + if len(res.choices) == 0: + logger.error("No choices outputed: ", res) + raise Exception("No choices from model") + + choices = cast(List[litellm.Choices], res.choices) + text = choices[0].message.content + + if text is None: + logger.error( + f"No text content in the response | response: {res} ", + ) + raise Exception("No text content in the response") + return text diff --git a/dendrite/logic/llm/config.py b/dendrite/logic/llm/config.py new file mode 100644 index 0000000..abc35eb --- /dev/null +++ b/dendrite/logic/llm/config.py @@ -0,0 +1,104 @@ +from typing import Dict, Literal, Optional, overload + +from dendrite.logic.llm.agent import LLM + +AGENTS = Literal[ + "extract_agent", + "scroll_agent", + "ask_page_agent", + "segment_agent", + "select_agent", + "verify_action_agent", +] + +DEFAULT_LLM: Dict[str, LLM] = { + "extract_agent": LLM( + "claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=1500 + ), + "scroll_agent": LLM("claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=1500), + "ask_page_agent": LLM( + "claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=1500 + ), + "segment_agent": LLM("claude-3-haiku-20240307", temperature=0, max_tokens=1500), + "select_agent": LLM("claude-3-5-sonnet-20241022", temperature=0, max_tokens=1500), + "verify_action_agent": LLM( + "claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=1500 + ), +} + + +class LLMConfig: + def __init__( + self, + default_agents: Optional[Dict[str, LLM]] = None, + default_llm: Optional[LLM] = None, + ): + self.registered_llms: Dict[str, LLM] = DEFAULT_LLM.copy() + if default_agents: + self.registered_llms.update(default_agents) + + self.default_llm = default_llm or LLM( + "claude-3-5-sonnet-20241022", temperature=0.3, max_tokens=1500 + ) + + async def register_agent(self, agent: str, llm: LLM) -> None: + """ + Register an LLM agent by name. + + Args: + agent: The name of the agent to register + llm: The LLM agent to register + """ + self.registered_llms[agent] = llm + + async def register(self, agents: Dict[str, LLM]) -> None: + """ + Register multiple LLM agents at once. Overrides if an agent has already been registered + + Args: + agents: A dictionary of agent names to LLM agents + """ + self.registered_llms.update(agents) + + @overload + def get(self, agent: str) -> LLM: ... + + @overload + def get(self, agent: str, default: LLM) -> LLM: ... + + @overload + def get( + self, + agent: str, + default: Optional[LLM] = ..., + use_default: Literal[False] = False, + ) -> Optional[LLM]: ... + + def get( + self, + agent: str, + default: Optional[LLM] = None, + use_default: bool = True, + ) -> Optional[LLM]: + """ + Get an LLM agent by name, optionally falling back to default if not found. + + Args: + agent: The name of the agent to retrieve + default: Optional specific default LLM to use if agent not found + use_default: If True, use self.default_llm when agent not found and default is None + + Returns: + Optional[LLM]: The requested LLM agent, default LLM, or None + """ + llm = self.registered_llms.get(agent) + if llm is not None: + return llm + + if default is not None: + return default + + if use_default and self.default_llm is not None: + return self.default_llm + + return None diff --git a/dendrite/logic/llm/token_count.py b/dendrite/logic/llm/token_count.py new file mode 100644 index 0000000..c7f57fb --- /dev/null +++ b/dendrite/logic/llm/token_count.py @@ -0,0 +1,7 @@ +import tiktoken + + +def token_count(string: str, encoding_name: str = "gpt-4o") -> int: + encoding = tiktoken.encoding_for_model(encoding_name) + num_tokens = len(encoding.encode(string)) + return num_tokens diff --git a/dendrite/logic/sync_logic_engine.py b/dendrite/logic/sync_logic_engine.py new file mode 100644 index 0000000..ccd2dd5 --- /dev/null +++ b/dendrite/logic/sync_logic_engine.py @@ -0,0 +1,73 @@ +import asyncio +import threading +from concurrent.futures import ThreadPoolExecutor +from typing import Any, Coroutine, List, TypeVar + +from dendrite.logic.ask import ask +from dendrite.logic.config import Config +from dendrite.logic.extract import extract +from dendrite.logic.get_element import get_element +from dendrite.logic.verify_interaction import verify_interaction +from dendrite.models.dto.ask_page_dto import AskPageDTO +from dendrite.models.dto.cached_extract_dto import CachedExtractDTO +from dendrite.models.dto.cached_selector_dto import CachedSelectorDTO +from dendrite.models.dto.extract_dto import ExtractDTO +from dendrite.models.dto.get_elements_dto import GetElementsDTO +from dendrite.models.dto.make_interaction_dto import VerifyActionDTO +from dendrite.models.response.ask_page_response import AskPageResponse +from dendrite.models.response.extract_response import ExtractResponse +from dendrite.models.response.get_element_response import GetElementResponse +from dendrite.models.response.interaction_response import InteractionResponse +from dendrite.models.scripts import Script +from dendrite.models.selector import Selector + +T = TypeVar("T") + + +def run_coroutine_sync(coroutine: Coroutine[Any, Any, T], timeout: float = 30) -> T: + def run_in_new_loop(): + new_loop = asyncio.new_event_loop() + asyncio.set_event_loop(new_loop) + try: + return new_loop.run_until_complete(coroutine) + finally: + new_loop.close() + + try: + loop = asyncio.get_running_loop() + except RuntimeError: + return asyncio.run(coroutine) + + if threading.current_thread() is threading.main_thread(): + if not loop.is_running(): + return loop.run_until_complete(coroutine) + else: + with ThreadPoolExecutor() as pool: + future = pool.submit(run_in_new_loop) + return future.result(timeout=timeout) + else: + return asyncio.run_coroutine_threadsafe(coroutine, loop).result() + + +class LogicEngine: + + def __init__(self, config: Config): + self._config = config + + def get_element(self, dto: GetElementsDTO) -> GetElementResponse: + return run_coroutine_sync(get_element.get_element(dto, self._config)) + + def get_cached_selectors(self, dto: CachedSelectorDTO) -> List[Selector]: + return run_coroutine_sync(get_element.get_cached_selector(dto, self._config)) + + def get_cached_scripts(self, dto: CachedExtractDTO) -> List[Script]: + return run_coroutine_sync(extract.get_cached_scripts(dto, self._config)) + + def extract(self, dto: ExtractDTO) -> ExtractResponse: + return run_coroutine_sync(extract.extract(dto, self._config)) + + def verify_action(self, dto: VerifyActionDTO) -> InteractionResponse: + return run_coroutine_sync(verify_interaction.verify_action(dto, self._config)) + + def ask_page(self, dto: AskPageDTO) -> AskPageResponse: + return run_coroutine_sync(ask.ask_page_action(dto, self._config)) diff --git a/dendrite/sync_api/_core/models/__init__.py b/dendrite/logic/verify_interaction/__init__.py similarity index 100% rename from dendrite/sync_api/_core/models/__init__.py rename to dendrite/logic/verify_interaction/__init__.py diff --git a/dendrite/logic/verify_interaction/verify_interaction.py b/dendrite/logic/verify_interaction/verify_interaction.py new file mode 100644 index 0000000..4efe943 --- /dev/null +++ b/dendrite/logic/verify_interaction/verify_interaction.py @@ -0,0 +1,92 @@ +import json +from typing import List + +from bs4 import BeautifulSoup + +from dendrite.logic.config import Config +from dendrite.logic.llm.agent import LLM, Agent, Message +from dendrite.models.dto.make_interaction_dto import VerifyActionDTO +from dendrite.models.response.interaction_response import InteractionResponse + + +async def verify_action( + make_interaction_dto: VerifyActionDTO, config: Config +) -> InteractionResponse: + + if ( + make_interaction_dto.interaction_type == "fill" + and make_interaction_dto.value == "" + ): + raise Exception(f"Error: You need to specify the keys you want to send.") + + interaction_verb = "" + if make_interaction_dto.interaction_type == "click": + interaction_verb = "clicked on" + elif make_interaction_dto.interaction_type == "fill": + interaction_verb = "sent keys to" + + locator_desc = "" + if make_interaction_dto.dendrite_id != "": + locator_desc = "the dendrite id '{element_dendrite_id}'" + + expected_outcome = ( + "" + if make_interaction_dto.expected_outcome == None + else f"The expected outcome is: '{make_interaction_dto.expected_outcome}'" + ) + prompt = f"I {interaction_verb} a <{make_interaction_dto.tag_name}> element with {locator_desc}. {expected_outcome}" + + messages: List[Message] = [ + { + "role": "user", + "content": [], + }, + { + "role": "user", + "content": [ + { + "type": "text", + "text": prompt, + }, + { + "type": "text", + "text": "Here is the viewport before the interaction:", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{make_interaction_dto.screenshot_before}" + }, + }, + { + "type": "text", + "text": "Here is the viewport after the interaction:", + }, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{make_interaction_dto.screenshot_after}" + }, + }, + { + "type": "text", + "text": """Based of the expected outcome, please output a json object that either confirms that the interaction was successful or that it failed. Output a json object like this with no description or backticks, just valid json. {"status": "success" | "failed", "message": "Give a short description of what happened and if the interaction completed successfully or failed to reach the expected outcome, write max 100 characters."}""", + }, + ], + }, + ] + + default = LLM(model="gpt-4o", max_tokens=150) + llm = Agent(config.llm_config.get("verify_action", default)) + + res = await llm.call_llm(messages) + try: + dict_res = json.loads(res) + return InteractionResponse( + message=dict_res["message"], + status=dict_res["status"], + ) + except: + pass + + raise Exception("Failed to parse interaction page delta.") diff --git a/dendrite/sync_api/_dom/__init__.py b/dendrite/models/__init__.py similarity index 100% rename from dendrite/sync_api/_dom/__init__.py rename to dendrite/models/__init__.py diff --git a/dendrite/sync_api/_core/models/api_config.py b/dendrite/models/api_config.py similarity index 93% rename from dendrite/sync_api/_core/models/api_config.py rename to dendrite/models/api_config.py index 7d90502..ad58987 100644 --- a/dendrite/sync_api/_core/models/api_config.py +++ b/dendrite/models/api_config.py @@ -1,6 +1,8 @@ from typing import Optional + from pydantic import BaseModel, model_validator -from dendrite._common._exceptions.dendrite_exception import MissingApiKeyError + +from dendrite.browser._common._exceptions.dendrite_exception import MissingApiKeyError class APIConfig(BaseModel): diff --git a/dendrite/sync_api/_ext_impl/browserless/__init__.py b/dendrite/models/dto/__init__.py similarity index 100% rename from dendrite/sync_api/_ext_impl/browserless/__init__.py rename to dendrite/models/dto/__init__.py diff --git a/dendrite/sync_api/_api/dto/ask_page_dto.py b/dendrite/models/dto/ask_page_dto.py similarity index 52% rename from dendrite/sync_api/_api/dto/ask_page_dto.py rename to dendrite/models/dto/ask_page_dto.py index f3eb650..ad039de 100644 --- a/dendrite/sync_api/_api/dto/ask_page_dto.py +++ b/dendrite/models/dto/ask_page_dto.py @@ -1,11 +1,11 @@ from typing import Any, Optional + from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._core.models.page_information import PageInformation + +from dendrite.models.page_information import PageInformation class AskPageDTO(BaseModel): prompt: str return_schema: Optional[Any] page_information: PageInformation - api_config: APIConfig diff --git a/dendrite/models/dto/cached_extract_dto.py b/dendrite/models/dto/cached_extract_dto.py new file mode 100644 index 0000000..83c4f47 --- /dev/null +++ b/dendrite/models/dto/cached_extract_dto.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class CachedExtractDTO(BaseModel): + url: str + prompt: str diff --git a/dendrite/models/dto/cached_selector_dto.py b/dendrite/models/dto/cached_selector_dto.py new file mode 100644 index 0000000..f4b243b --- /dev/null +++ b/dendrite/models/dto/cached_selector_dto.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel + + +class CachedSelectorDTO(BaseModel): + url: str + prompt: str diff --git a/dendrite/async_api/_api/dto/extract_dto.py b/dendrite/models/dto/extract_dto.py similarity index 59% rename from dendrite/async_api/_api/dto/extract_dto.py rename to dendrite/models/dto/extract_dto.py index 0216cce..e87544c 100644 --- a/dendrite/async_api/_api/dto/extract_dto.py +++ b/dendrite/models/dto/extract_dto.py @@ -1,25 +1,24 @@ import json -from typing import Any +from typing import Any, Optional + from pydantic import BaseModel -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._core.models.page_information import PageInformation + +from dendrite.models.page_information import PageInformation class ExtractDTO(BaseModel): page_information: PageInformation - api_config: APIConfig prompt: str + return_data_json_schema: Any use_screenshot: bool = False - use_cache: bool = True - force_use_cache: bool = False @property def combined_prompt(self) -> str: json_schema_prompt = ( "" - if self.return_data_json_schema is None + if self.return_data_json_schema == None else f"\nJson schema: {json.dumps(self.return_data_json_schema)}" ) return f"Task: {self.prompt}{json_schema_prompt}" diff --git a/dendrite/async_api/_api/dto/get_elements_dto.py b/dendrite/models/dto/get_elements_dto.py similarity index 55% rename from dendrite/async_api/_api/dto/get_elements_dto.py rename to dendrite/models/dto/get_elements_dto.py index 86118a2..aeb488d 100644 --- a/dendrite/async_api/_api/dto/get_elements_dto.py +++ b/dendrite/models/dto/get_elements_dto.py @@ -1,8 +1,8 @@ from typing import Dict, Union + from pydantic import BaseModel -from dendrite.async_api._core.models.api_config import APIConfig -from dendrite.async_api._core.models.page_information import PageInformation +from dendrite.models.page_information import PageInformation class CheckSelectorCacheDTO(BaseModel): @@ -11,9 +11,6 @@ class CheckSelectorCacheDTO(BaseModel): class GetElementsDTO(BaseModel): - page_information: PageInformation prompt: Union[str, Dict[str, str]] - api_config: APIConfig - use_cache: bool = True + page_information: PageInformation only_one: bool - force_use_cache: bool = False diff --git a/dendrite/models/dto/make_interaction_dto.py b/dendrite/models/dto/make_interaction_dto.py new file mode 100644 index 0000000..4fcab0f --- /dev/null +++ b/dendrite/models/dto/make_interaction_dto.py @@ -0,0 +1,18 @@ +from typing import Literal, Optional + +from pydantic import BaseModel + +from dendrite.models.page_information import PageDiffInformation + +InteractionType = Literal["click", "fill", "hover"] + + +class VerifyActionDTO(BaseModel): + url: str + dendrite_id: str + interaction_type: InteractionType + tag_name: str + value: Optional[str] = None + expected_outcome: str + screenshot_before: str + screenshot_after: str diff --git a/dendrite/sync_api/_core/models/page_information.py b/dendrite/models/page_information.py similarity index 50% rename from dendrite/sync_api/_core/models/page_information.py rename to dendrite/models/page_information.py index 67e1909..aded4d5 100644 --- a/dendrite/sync_api/_core/models/page_information.py +++ b/dendrite/models/page_information.py @@ -1,15 +1,15 @@ -from typing import Dict, Optional -from typing_extensions import TypedDict from pydantic import BaseModel -class InteractableElementInfo(TypedDict): - attrs: Optional[str] - text: Optional[str] - - class PageInformation(BaseModel): url: str raw_html: str screenshot_base64: str time_since_frame_navigated: float + + +class PageDiffInformation(BaseModel): + screenshot_before: str + screenshot_after: str + page_before: PageInformation + page_after: PageInformation diff --git a/dendrite/models/response/__init__.py b/dendrite/models/response/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dendrite/async_api/_api/response/ask_page_response.py b/dendrite/models/response/ask_page_response.py similarity index 100% rename from dendrite/async_api/_api/response/ask_page_response.py rename to dendrite/models/response/ask_page_response.py index 4ec747a..86c6865 100644 --- a/dendrite/async_api/_api/response/ask_page_response.py +++ b/dendrite/models/response/ask_page_response.py @@ -1,6 +1,6 @@ from typing import Generic, Literal, TypeVar -from pydantic import BaseModel +from pydantic import BaseModel T = TypeVar("T") diff --git a/dendrite/sync_api/_api/response/extract_response.py b/dendrite/models/response/extract_response.py similarity index 70% rename from dendrite/sync_api/_api/response/extract_response.py rename to dendrite/models/response/extract_response.py index 0ef6e59..87dba47 100644 --- a/dendrite/sync_api/_api/response/extract_response.py +++ b/dendrite/models/response/extract_response.py @@ -1,13 +1,14 @@ from typing import Generic, Optional, TypeVar + from pydantic import BaseModel -from dendrite.sync_api._common.status import Status + +from dendrite.browser._common.types import Status T = TypeVar("T") class ExtractResponse(BaseModel, Generic[T]): - return_data: T + status: Status message: str + return_data: Optional[T] = None created_script: Optional[str] = None - status: Status - used_cache: bool diff --git a/dendrite/async_api/_api/response/get_element_response.py b/dendrite/models/response/get_element_response.py similarity index 51% rename from dendrite/async_api/_api/response/get_element_response.py rename to dendrite/models/response/get_element_response.py index c49caaf..6f2534c 100644 --- a/dendrite/async_api/_api/response/get_element_response.py +++ b/dendrite/models/response/get_element_response.py @@ -2,11 +2,11 @@ from pydantic import BaseModel -from dendrite.async_api._common.status import Status +from dendrite.models.status import Status class GetElementResponse(BaseModel): status: Status - selectors: Optional[Union[List[str], Dict[str, List[str]]]] = None + d_id: Optional[str] = None + selectors: Optional[List[str]] = None message: str = "" - used_cache: bool = False diff --git a/dendrite/sync_api/_api/response/interaction_response.py b/dendrite/models/response/interaction_response.py similarity index 67% rename from dendrite/sync_api/_api/response/interaction_response.py rename to dendrite/models/response/interaction_response.py index f273056..6bd1879 100644 --- a/dendrite/sync_api/_api/response/interaction_response.py +++ b/dendrite/models/response/interaction_response.py @@ -1,5 +1,6 @@ from pydantic import BaseModel -from dendrite.sync_api._common.status import Status + +from dendrite.models.status import Status class InteractionResponse(BaseModel): diff --git a/dendrite/models/scripts.py b/dendrite/models/scripts.py new file mode 100644 index 0000000..7507d9d --- /dev/null +++ b/dendrite/models/scripts.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel + + +class Script(BaseModel): + url: str + domain: str + script: str + created_at: str diff --git a/dendrite/models/selector.py b/dendrite/models/selector.py new file mode 100644 index 0000000..5e8e964 --- /dev/null +++ b/dendrite/models/selector.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + + +class Selector(BaseModel): + selector: str + prompt: str + url: str + netloc: str + created_at: str diff --git a/dendrite/async_api/_common/status.py b/dendrite/models/status.py similarity index 98% rename from dendrite/async_api/_common/status.py rename to dendrite/models/status.py index 0068d7d..427449d 100644 --- a/dendrite/async_api/_common/status.py +++ b/dendrite/models/status.py @@ -1,4 +1,3 @@ from typing import Literal - Status = Literal["success", "failed", "loading", "impossible"] diff --git a/dendrite/remote/__init__.py b/dendrite/remote/__init__.py index 434c45d..e6b3079 100644 --- a/dendrite/remote/__init__.py +++ b/dendrite/remote/__init__.py @@ -1,8 +1,6 @@ -from typing import Union -from dendrite.remote.browserless_config import BrowserlessConfig -from dendrite.remote.browserbase_config import BrowserbaseConfig +from dendrite.browser.remote import BrowserbaseConfig, BrowserlessConfig, Providers - -Providers = Union[BrowserbaseConfig, BrowserlessConfig] - -__all__ = ["Providers", "BrowserbaseConfig"] +__all__ = [ + "BrowserbaseConfig", + "BrowserlessConfig", +] diff --git a/dendrite/sync_api/__init__.py b/dendrite/sync_api/__init__.py deleted file mode 100644 index 3085e23..0000000 --- a/dendrite/sync_api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from loguru import logger -from ._core.dendrite_browser import Dendrite -from ._core.dendrite_element import Element -from ._core.dendrite_page import Page -from ._core.models.response import ElementsResponse - -__all__ = ["Dendrite", "Element", "Page", "ElementsResponse"] diff --git a/dendrite/sync_api/_api/_http_client.py b/dendrite/sync_api/_api/_http_client.py deleted file mode 100644 index e80ab64..0000000 --- a/dendrite/sync_api/_api/_http_client.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -from typing import Optional -import httpx -from loguru import logger -from dendrite.sync_api._core.models.api_config import APIConfig - - -class HTTPClient: - - def __init__(self, api_config: APIConfig, session_id: Optional[str] = None): - self.api_key = api_config.dendrite_api_key - self.session_id = session_id - self.base_url = self.resolve_base_url() - - def resolve_base_url(self): - base_url = ( - "http://localhost:8000/api/v1" - if os.environ.get("DENDRITE_DEV") - else "https://dendrite-server.azurewebsites.net/api/v1" - ) - return base_url - - def send_request( - self, - endpoint: str, - params: Optional[dict] = None, - data: Optional[dict] = None, - headers: Optional[dict] = None, - method: str = "GET", - ) -> httpx.Response: - url = f"{self.base_url}/{endpoint}" - headers = headers or {} - headers["Content-Type"] = "application/json" - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - if self.session_id: - headers["X-Session-ID"] = self.session_id - with httpx.Client(timeout=300) as client: - try: - response = client.request( - method, url, params=params, json=data, headers=headers - ) - response.raise_for_status() - return response - except httpx.HTTPStatusError as http_err: - logger.debug( - f"HTTP error occurred: {http_err.response.status_code}: {http_err.response.text}" - ) - raise - except httpx.ConnectError as connect_err: - logger.error( - f"Connection error occurred: {connect_err}. {url} Server might be down" - ) - raise - except httpx.RequestError as req_err: - raise - except Exception as err: - raise diff --git a/dendrite/sync_api/_api/browser_api_client.py b/dendrite/sync_api/_api/browser_api_client.py deleted file mode 100644 index 54da703..0000000 --- a/dendrite/sync_api/_api/browser_api_client.py +++ /dev/null @@ -1,100 +0,0 @@ -from typing import Optional -from loguru import logger -from dendrite.sync_api._api.response.cache_extract_response import CacheExtractResponse -from dendrite.sync_api._api.response.selector_cache_response import ( - SelectorCacheResponse, -) -from dendrite.sync_api._core.models.authentication import AuthSession -from dendrite.sync_api._api.response.get_element_response import GetElementResponse -from dendrite.sync_api._api.dto.ask_page_dto import AskPageDTO -from dendrite.sync_api._api.dto.authenticate_dto import AuthenticateDTO -from dendrite.sync_api._api.dto.get_elements_dto import GetElementsDTO -from dendrite.sync_api._api.dto.make_interaction_dto import MakeInteractionDTO -from dendrite.sync_api._api.dto.extract_dto import ExtractDTO -from dendrite.sync_api._api.dto.try_run_script_dto import TryRunScriptDTO -from dendrite.sync_api._api.dto.upload_auth_session_dto import UploadAuthSessionDTO -from dendrite.sync_api._api.response.ask_page_response import AskPageResponse -from dendrite.sync_api._api.response.interaction_response import InteractionResponse -from dendrite.sync_api._api.response.extract_response import ExtractResponse -from dendrite.sync_api._api._http_client import HTTPClient -from dendrite._common._exceptions.dendrite_exception import InvalidAuthSessionError -from dendrite.sync_api._api.dto.get_elements_dto import CheckSelectorCacheDTO - - -class BrowserAPIClient(HTTPClient): - - def authenticate(self, dto: AuthenticateDTO): - res = self.send_request( - "actions/authenticate", data=dto.model_dump(), method="POST" - ) - if res.status_code == 204: - raise InvalidAuthSessionError(domain=dto.domains) - return AuthSession(**res.json()) - - def upload_auth_session(self, dto: UploadAuthSessionDTO): - self.send_request("actions/upload-auth-session", data=dto.dict(), method="POST") - - def check_selector_cache(self, dto: CheckSelectorCacheDTO) -> SelectorCacheResponse: - res = self.send_request( - "actions/check-selector-cache", data=dto.dict(), method="POST" - ) - return SelectorCacheResponse(**res.json()) - - def get_interactions_selector(self, dto: GetElementsDTO) -> GetElementResponse: - res = self.send_request( - "actions/get-interaction-selector", data=dto.dict(), method="POST" - ) - return GetElementResponse(**res.json()) - - def make_interaction(self, dto: MakeInteractionDTO) -> InteractionResponse: - res = self.send_request( - "actions/make-interaction", data=dto.dict(), method="POST" - ) - res_dict = res.json() - return InteractionResponse( - status=res_dict["status"], message=res_dict["message"] - ) - - def check_extract_cache(self, dto: ExtractDTO) -> CacheExtractResponse: - res = self.send_request( - "actions/check-extract-cache", data=dto.dict(), method="POST" - ) - return CacheExtractResponse(**res.json()) - - def extract(self, dto: ExtractDTO) -> ExtractResponse: - res = self.send_request("actions/extract-page", data=dto.dict(), method="POST") - res_dict = res.json() - return ExtractResponse( - status=res_dict["status"], - message=res_dict["message"], - return_data=res_dict["return_data"], - created_script=res_dict.get("created_script", None), - used_cache=res_dict.get("used_cache", False), - ) - - def ask_page(self, dto: AskPageDTO) -> AskPageResponse: - res = self.send_request("actions/ask-page", data=dto.dict(), method="POST") - res_dict = res.json() - return AskPageResponse( - status=res_dict["status"], - description=res_dict["description"], - return_data=res_dict["return_data"], - ) - - def try_run_cached(self, dto: TryRunScriptDTO) -> Optional[ExtractResponse]: - res = self.send_request( - "actions/try-run-cached", data=dto.dict(), method="POST" - ) - if res is None: - return None - res_dict = res.json() - loaded_value = res_dict["return_data"] - if loaded_value is None: - return None - return ExtractResponse( - status=res_dict["status"], - message=res_dict["message"], - return_data=loaded_value, - created_script=res_dict.get("created_script", None), - used_cache=res_dict.get("used_cache", False), - ) diff --git a/dendrite/sync_api/_api/dto/authenticate_dto.py b/dendrite/sync_api/_api/dto/authenticate_dto.py deleted file mode 100644 index f5a1de7..0000000 --- a/dendrite/sync_api/_api/dto/authenticate_dto.py +++ /dev/null @@ -1,6 +0,0 @@ -from typing import Union -from pydantic import BaseModel - - -class AuthenticateDTO(BaseModel): - domains: Union[str, list[str]] diff --git a/dendrite/sync_api/_api/dto/extract_dto.py b/dendrite/sync_api/_api/dto/extract_dto.py deleted file mode 100644 index f2f7694..0000000 --- a/dendrite/sync_api/_api/dto/extract_dto.py +++ /dev/null @@ -1,24 +0,0 @@ -import json -from typing import Any -from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._core.models.page_information import PageInformation - - -class ExtractDTO(BaseModel): - page_information: PageInformation - api_config: APIConfig - prompt: str - return_data_json_schema: Any - use_screenshot: bool = False - use_cache: bool = True - force_use_cache: bool = False - - @property - def combined_prompt(self) -> str: - json_schema_prompt = ( - "" - if self.return_data_json_schema is None - else f"\nJson schema: {json.dumps(self.return_data_json_schema)}" - ) - return f"Task: {self.prompt}{json_schema_prompt}" diff --git a/dendrite/sync_api/_api/dto/get_elements_dto.py b/dendrite/sync_api/_api/dto/get_elements_dto.py deleted file mode 100644 index d9d2b06..0000000 --- a/dendrite/sync_api/_api/dto/get_elements_dto.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Dict, Union -from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._core.models.page_information import PageInformation - - -class CheckSelectorCacheDTO(BaseModel): - url: str - prompt: Union[str, Dict[str, str]] - - -class GetElementsDTO(BaseModel): - page_information: PageInformation - prompt: Union[str, Dict[str, str]] - api_config: APIConfig - use_cache: bool = True - only_one: bool - force_use_cache: bool = False diff --git a/dendrite/sync_api/_api/dto/get_interaction_dto.py b/dendrite/sync_api/_api/dto/get_interaction_dto.py deleted file mode 100644 index bdc7654..0000000 --- a/dendrite/sync_api/_api/dto/get_interaction_dto.py +++ /dev/null @@ -1,9 +0,0 @@ -from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._core.models.page_information import PageInformation - - -class GetInteractionDTO(BaseModel): - page_information: PageInformation - api_config: APIConfig - prompt: str diff --git a/dendrite/sync_api/_api/dto/get_session_dto.py b/dendrite/sync_api/_api/dto/get_session_dto.py deleted file mode 100644 index 6414cc3..0000000 --- a/dendrite/sync_api/_api/dto/get_session_dto.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import List -from pydantic import BaseModel - - -class GetSessionDTO(BaseModel): - user_id: str - domain: str diff --git a/dendrite/sync_api/_api/dto/google_search_dto.py b/dendrite/sync_api/_api/dto/google_search_dto.py deleted file mode 100644 index 3e81d7d..0000000 --- a/dendrite/sync_api/_api/dto/google_search_dto.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Optional -from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._core.models.page_information import PageInformation - - -class GoogleSearchDTO(BaseModel): - query: str - country: Optional[str] = None - filter_results_prompt: Optional[str] = None - page_information: PageInformation - api_config: APIConfig diff --git a/dendrite/sync_api/_api/dto/make_interaction_dto.py b/dendrite/sync_api/_api/dto/make_interaction_dto.py deleted file mode 100644 index 2d806a3..0000000 --- a/dendrite/sync_api/_api/dto/make_interaction_dto.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import Literal, Optional -from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig -from dendrite.sync_api._core.models.page_diff_information import PageDiffInformation - -InteractionType = Literal["click", "fill", "hover"] - - -class MakeInteractionDTO(BaseModel): - url: str - dendrite_id: str - interaction_type: InteractionType - value: Optional[str] = None - expected_outcome: Optional[str] - page_delta_information: PageDiffInformation - api_config: APIConfig diff --git a/dendrite/sync_api/_api/dto/try_run_script_dto.py b/dendrite/sync_api/_api/dto/try_run_script_dto.py deleted file mode 100644 index 778c251..0000000 --- a/dendrite/sync_api/_api/dto/try_run_script_dto.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import Any, Optional -from pydantic import BaseModel -from dendrite.sync_api._core.models.api_config import APIConfig - - -class TryRunScriptDTO(BaseModel): - url: str - raw_html: str - api_config: APIConfig - prompt: str - db_prompt: Optional[str] = None - return_data_json_schema: Any diff --git a/dendrite/sync_api/_api/dto/upload_auth_session_dto.py b/dendrite/sync_api/_api/dto/upload_auth_session_dto.py deleted file mode 100644 index 0741b65..0000000 --- a/dendrite/sync_api/_api/dto/upload_auth_session_dto.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel -from dendrite.sync_api._core.models.authentication import AuthSession, StorageState - - -class UploadAuthSessionDTO(BaseModel): - auth_data: AuthSession - storage_state: StorageState diff --git a/dendrite/sync_api/_api/response/ask_page_response.py b/dendrite/sync_api/_api/response/ask_page_response.py deleted file mode 100644 index 8d99ddc..0000000 --- a/dendrite/sync_api/_api/response/ask_page_response.py +++ /dev/null @@ -1,10 +0,0 @@ -from typing import Generic, Literal, TypeVar -from pydantic import BaseModel - -T = TypeVar("T") - - -class AskPageResponse(BaseModel, Generic[T]): - status: Literal["success", "error"] - return_data: T - description: str diff --git a/dendrite/sync_api/_api/response/cache_extract_response.py b/dendrite/sync_api/_api/response/cache_extract_response.py deleted file mode 100644 index 463d03b..0000000 --- a/dendrite/sync_api/_api/response/cache_extract_response.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class CacheExtractResponse(BaseModel): - exists: bool diff --git a/dendrite/sync_api/_api/response/get_element_response.py b/dendrite/sync_api/_api/response/get_element_response.py deleted file mode 100644 index d268cca..0000000 --- a/dendrite/sync_api/_api/response/get_element_response.py +++ /dev/null @@ -1,10 +0,0 @@ -from typing import Dict, List, Optional, Union -from pydantic import BaseModel -from dendrite.sync_api._common.status import Status - - -class GetElementResponse(BaseModel): - status: Status - selectors: Optional[Union[List[str], Dict[str, List[str]]]] = None - message: str = "" - used_cache: bool = False diff --git a/dendrite/sync_api/_api/response/google_search_response.py b/dendrite/sync_api/_api/response/google_search_response.py deleted file mode 100644 index d435b71..0000000 --- a/dendrite/sync_api/_api/response/google_search_response.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import List -from pydantic import BaseModel - - -class SearchResult(BaseModel): - url: str - title: str - description: str - - -class GoogleSearchResponse(BaseModel): - results: List[SearchResult] diff --git a/dendrite/sync_api/_api/response/selector_cache_response.py b/dendrite/sync_api/_api/response/selector_cache_response.py deleted file mode 100644 index 4c0e388..0000000 --- a/dendrite/sync_api/_api/response/selector_cache_response.py +++ /dev/null @@ -1,5 +0,0 @@ -from pydantic import BaseModel - - -class SelectorCacheResponse(BaseModel): - exists: bool diff --git a/dendrite/sync_api/_api/response/session_response.py b/dendrite/sync_api/_api/response/session_response.py deleted file mode 100644 index 2d03b97..0000000 --- a/dendrite/sync_api/_api/response/session_response.py +++ /dev/null @@ -1,7 +0,0 @@ -from typing import List -from pydantic import BaseModel - - -class SessionResponse(BaseModel): - cookies: List[dict] - origins_storage: List[dict] diff --git a/dendrite/sync_api/_common/constants.py b/dendrite/sync_api/_common/constants.py deleted file mode 100644 index ee49898..0000000 --- a/dendrite/sync_api/_common/constants.py +++ /dev/null @@ -1,66 +0,0 @@ -STEALTH_ARGS = [ - "--no-pings", - "--mute-audio", - "--no-first-run", - "--no-default-browser-check", - "--disable-cloud-import", - "--disable-gesture-typing", - "--disable-offer-store-unmasked-wallet-cards", - "--disable-offer-upload-credit-cards", - "--disable-print-preview", - "--disable-voice-input", - "--disable-wake-on-wifi", - "--disable-cookie-encryption", - "--ignore-gpu-blocklist", - "--enable-async-dns", - "--enable-simple-cache-backend", - "--enable-tcp-fast-open", - "--prerender-from-omnibox=disabled", - "--enable-web-bluetooth", - "--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process,TranslateUI,BlinkGenPropertyTrees", - "--aggressive-cache-discard", - "--disable-extensions", - "--disable-ipc-flooding-protection", - "--disable-blink-features=AutomationControlled", - "--test-type", - "--enable-features=NetworkService,NetworkServiceInProcess,TrustTokens,TrustTokensAlwaysAllowIssuance", - "--disable-component-extensions-with-background-pages", - "--disable-default-apps", - "--disable-breakpad", - "--disable-component-update", - "--disable-domain-reliability", - "--disable-sync", - "--disable-client-side-phishing-detection", - "--disable-hang-monitor", - "--disable-popup-blocking", - "--disable-prompt-on-repost", - "--metrics-recording-only", - "--safebrowsing-disable-auto-update", - "--password-store=basic", - "--autoplay-policy=no-user-gesture-required", - "--use-mock-keychain", - "--force-webrtc-ip-handling-policy=disable_non_proxied_udp", - "--webrtc-ip-handling-policy=disable_non_proxied_udp", - "--disable-session-crashed-bubble", - "--disable-crash-reporter", - "--disable-dev-shm-usage", - "--force-color-profile=srgb", - "--disable-translate", - "--disable-background-networking", - "--disable-background-timer-throttling", - "--disable-backgrounding-occluded-windows", - "--disable-infobars", - "--hide-scrollbars", - "--disable-renderer-backgrounding", - "--font-render-hinting=none", - "--disable-logging", - "--enable-surface-synchronization", - "--disable-threaded-animation", - "--disable-threaded-scrolling", - "--disable-checker-imaging", - "--disable-new-content-rendering-timeout", - "--disable-image-animation-resync", - "--disable-partial-raster", - "--blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4", - "--disable-layer-tree-host-memory-pressure", -] diff --git a/dendrite/sync_api/_core/_impl_browser.py b/dendrite/sync_api/_core/_impl_browser.py deleted file mode 100644 index bbff259..0000000 --- a/dendrite/sync_api/_core/_impl_browser.py +++ /dev/null @@ -1,85 +0,0 @@ -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_browser import Dendrite -from dendrite.sync_api._core._type_spec import PlaywrightPage -from playwright.sync_api import Download, Browser, Playwright - - -class ImplBrowser(ABC): - - @abstractmethod - def __init__(self, settings): - pass - - @abstractmethod - def get_download( - self, dendrite_browser: "Dendrite", pw_page: PlaywrightPage, timeout: float - ) -> Download: - """ - Retrieves the download event from the browser. - - Returns: - Download: The download event. - - Raises: - Exception: If there is an issue retrieving the download event. - """ - pass - - @abstractmethod - def start_browser(self, playwright: Playwright, pw_options: dict) -> Browser: - """ - Starts the browser session. - - Returns: - Browser: The browser session. - - Raises: - Exception: If there is an issue starting the browser session. - """ - pass - - @abstractmethod - def configure_context(self, browser: "Dendrite") -> None: - """ - Configures the browser context. - - Args: - browser (Dendrite): The browser to configure. - - Raises: - Exception: If there is an issue configuring the browser context. - """ - pass - - @abstractmethod - def stop_session(self) -> None: - """ - Stops the browser session. - - Raises: - Exception: If there is an issue stopping the browser session. - """ - pass - - -class LocalImpl(ImplBrowser): - - def __init__(self) -> None: - pass - - def start_browser(self, playwright: Playwright, pw_options) -> Browser: - return playwright.chromium.launch(**pw_options) - - def get_download( - self, dendrite_browser: "Dendrite", pw_page: PlaywrightPage, timeout: float - ) -> Download: - return dendrite_browser._download_handler.get_data(pw_page, timeout) - - def configure_context(self, browser: "Dendrite"): - pass - - def stop_session(self): - pass diff --git a/dendrite/sync_api/_core/_impl_mapping.py b/dendrite/sync_api/_core/_impl_mapping.py deleted file mode 100644 index fc0688f..0000000 --- a/dendrite/sync_api/_core/_impl_mapping.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Any, Dict, Optional, Type -from dendrite.sync_api._core._impl_browser import ImplBrowser, LocalImpl -from dendrite.sync_api._ext_impl.browserbase._impl import BrowserBaseImpl -from dendrite.sync_api._ext_impl.browserless._impl import BrowserlessImpl -from dendrite.remote.browserless_config import BrowserlessConfig -from dendrite.remote.browserbase_config import BrowserbaseConfig -from dendrite.remote import Providers - -IMPL_MAPPING: Dict[Type[Providers], Type[ImplBrowser]] = { - BrowserbaseConfig: BrowserBaseImpl, - BrowserlessConfig: BrowserlessImpl, -} -SETTINGS_CLASSES: Dict[str, Type[Providers]] = { - "browserbase": BrowserbaseConfig, - "browserless": BrowserlessConfig, -} - - -def get_impl(remote_provider: Optional[Providers]) -> ImplBrowser: - if remote_provider is None: - return LocalImpl() - try: - provider_class = IMPL_MAPPING[type(remote_provider)] - except KeyError: - raise ValueError( - f"No implementation for {type(remote_provider)}. Available providers: {', '.join(map(lambda x: x.__name__, IMPL_MAPPING.keys()))}" - ) - return provider_class(remote_provider) diff --git a/dendrite/sync_api/_core/_type_spec.py b/dendrite/sync_api/_core/_type_spec.py deleted file mode 100644 index 5dfbb9b..0000000 --- a/dendrite/sync_api/_core/_type_spec.py +++ /dev/null @@ -1,35 +0,0 @@ -import inspect -from typing import Any, Dict, Literal, Type, TypeVar, Union -from pydantic import BaseModel -from playwright.sync_api import Page - -Interaction = Literal["click", "fill", "hover"] -T = TypeVar("T") -PydanticModel = TypeVar("PydanticModel", bound=BaseModel) -PrimitiveTypes = PrimitiveTypes = Union[Type[bool], Type[int], Type[float], Type[str]] -JsonSchema = Dict[str, Any] -TypeSpec = Union[PrimitiveTypes, PydanticModel, JsonSchema] -PlaywrightPage = Page - - -def to_json_schema(type_spec: TypeSpec) -> Dict[str, Any]: - if isinstance(type_spec, dict): - return type_spec - if inspect.isclass(type_spec) and issubclass(type_spec, BaseModel): - return type_spec.model_json_schema() - if type_spec in (bool, int, float, str): - type_map = {bool: "boolean", int: "integer", float: "number", str: "string"} - return {"type": type_map[type_spec]} - raise ValueError(f"Unsupported type specification: {type_spec}") - - -def convert_to_type_spec(type_spec: TypeSpec, return_data: Any) -> TypeSpec: - if isinstance(type_spec, type): - if issubclass(type_spec, BaseModel): - return type_spec.model_validate(return_data) - if type_spec in (str, float, bool, int): - return type_spec(return_data) - raise ValueError(f"Unsupported type: {type_spec}") - if isinstance(type_spec, dict): - return return_data - raise ValueError(f"Unsupported type specification: {type_spec}") diff --git a/dendrite/sync_api/_core/_utils.py b/dendrite/sync_api/_core/_utils.py deleted file mode 100644 index 056b358..0000000 --- a/dendrite/sync_api/_core/_utils.py +++ /dev/null @@ -1,101 +0,0 @@ -from typing import Optional, Union, List, TYPE_CHECKING -from playwright.sync_api import FrameLocator, ElementHandle, Error, Frame -from bs4 import BeautifulSoup -from loguru import logger -from dendrite.sync_api._api.response.get_element_response import GetElementResponse -from dendrite.sync_api._core._type_spec import PlaywrightPage -from dendrite.sync_api._core.dendrite_element import Element -from dendrite.sync_api._core.models.response import ElementsResponse - -if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_page import Page -from dendrite.sync_api._core._js import GENERATE_DENDRITE_IDS_IFRAME_SCRIPT -from dendrite.sync_api._dom.util.mild_strip import mild_strip_in_place - - -def expand_iframes(page: PlaywrightPage, page_soup: BeautifulSoup): - - def get_iframe_path(frame: Frame): - path_parts = [] - current_frame = frame - while current_frame.parent_frame is not None: - iframe_element = current_frame.frame_element() - iframe_id = iframe_element.get_attribute("d-id") - if iframe_id is None: - return None - path_parts.insert(0, iframe_id) - current_frame = current_frame.parent_frame - return "|".join(path_parts) - - for frame in page.frames: - if frame.parent_frame is None: - continue - iframe_element = frame.frame_element() - iframe_id = iframe_element.get_attribute("d-id") - if iframe_id is None: - continue - iframe_path = get_iframe_path(frame) - if iframe_path is None: - continue - try: - frame.evaluate( - GENERATE_DENDRITE_IDS_IFRAME_SCRIPT, {"frame_path": iframe_path} - ) - frame_content = frame.content() - frame_tree = BeautifulSoup(frame_content, "lxml") - mild_strip_in_place(frame_tree) - merge_iframe_to_page(iframe_id, page_soup, frame_tree) - except Error as e: - logger.debug(f"Error processing frame {iframe_id}: {e}") - continue - - -def merge_iframe_to_page(iframe_id: str, page: BeautifulSoup, iframe: BeautifulSoup): - iframe_element = page.find("iframe", {"d-id": iframe_id}) - if iframe_element is None: - logger.debug(f"Could not find iframe with ID {iframe_id} in page soup") - return - iframe_element.replace_with(iframe) - - -def _get_all_elements_from_selector_soup( - selector: str, soup: BeautifulSoup, page: "Page" -) -> List[Element]: - dendrite_elements: List[Element] = [] - elements = soup.select(selector) - for element in elements: - frame = page._get_context(element) - d_id = element.get("d-id", "") - locator = frame.locator(f"xpath=//*[@d-id='{d_id}']") - if not d_id: - continue - if isinstance(d_id, list): - d_id = d_id[0] - dendrite_elements.append( - Element(d_id, locator, page.dendrite_browser, page._browser_api_client) - ) - return dendrite_elements - - -def get_elements_from_selectors_soup( - page: "Page", soup: BeautifulSoup, res: GetElementResponse, only_one: bool -) -> Union[Optional[Element], List[Element], ElementsResponse]: - if isinstance(res.selectors, dict): - result = {} - for key, selectors in res.selectors.items(): - for selector in selectors: - dendrite_elements = _get_all_elements_from_selector_soup( - selector, soup, page - ) - if len(dendrite_elements) > 0: - result[key] = dendrite_elements[0] - break - return ElementsResponse(result) - elif isinstance(res.selectors, list): - for selector in reversed(res.selectors): - dendrite_elements = _get_all_elements_from_selector_soup( - selector, soup, page - ) - if len(dendrite_elements) > 0: - return dendrite_elements[0] if only_one else dendrite_elements - return None diff --git a/dendrite/sync_api/_core/mixin/extract.py b/dendrite/sync_api/_core/mixin/extract.py deleted file mode 100644 index 15ca694..0000000 --- a/dendrite/sync_api/_core/mixin/extract.py +++ /dev/null @@ -1,232 +0,0 @@ -import time -import time -from typing import Any, Optional, Type, overload, List -from dendrite.sync_api._api.dto.extract_dto import ExtractDTO -from dendrite.sync_api._api.response.cache_extract_response import CacheExtractResponse -from dendrite.sync_api._api.response.extract_response import ExtractResponse -from dendrite.sync_api._core._type_spec import ( - JsonSchema, - PydanticModel, - TypeSpec, - convert_to_type_spec, - to_json_schema, -) -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite.sync_api._core._managers.navigation_tracker import NavigationTracker -from loguru import logger - -CACHE_TIMEOUT = 5 - - -class ExtractionMixin(DendritePageProtocol): - """ - Mixin that provides extraction functionality for web pages. - - This mixin provides various `extract` methods that allow extracting - different types of data (e.g., bool, int, float, string, Pydantic models, etc.) - from a web page based on a given prompt. - """ - - @overload - def extract( - self, - prompt: str, - type_spec: Type[bool], - use_cache: bool = True, - timeout: int = 180, - ) -> bool: ... - - @overload - def extract( - self, - prompt: str, - type_spec: Type[int], - use_cache: bool = True, - timeout: int = 180, - ) -> int: ... - - @overload - def extract( - self, - prompt: str, - type_spec: Type[float], - use_cache: bool = True, - timeout: int = 180, - ) -> float: ... - - @overload - def extract( - self, - prompt: str, - type_spec: Type[str], - use_cache: bool = True, - timeout: int = 180, - ) -> str: ... - - @overload - def extract( - self, - prompt: Optional[str], - type_spec: Type[PydanticModel], - use_cache: bool = True, - timeout: int = 180, - ) -> PydanticModel: ... - - @overload - def extract( - self, - prompt: Optional[str], - type_spec: JsonSchema, - use_cache: bool = True, - timeout: int = 180, - ) -> JsonSchema: ... - - @overload - def extract( - self, - prompt: str, - type_spec: None = None, - use_cache: bool = True, - timeout: int = 180, - ) -> Any: ... - - def extract( - self, - prompt: Optional[str], - type_spec: Optional[TypeSpec] = None, - use_cache: bool = True, - timeout: int = 180, - ) -> TypeSpec: - """ - Extract data from a web page based on a prompt and optional type specification. - Args: - prompt (Optional[str]): The prompt to describe the information to extract. - type_spec (Optional[TypeSpec], optional): The type specification for the extracted data. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached scripts before falling back to the - extraction agent for the remaining time that will attempt to generate a new script. Defaults to 15000 (15 seconds). - - Returns: - ExtractResponse: The extracted data wrapped in a ExtractResponse object. - Raises: - TimeoutError: If the extraction process exceeds the specified timeout. - """ - logger.info(f"Starting extraction with prompt: {prompt}") - json_schema = None - if type_spec: - json_schema = to_json_schema(type_spec) - logger.debug(f"Type specification converted to JSON schema: {json_schema}") - if prompt is None: - prompt = "" - start_time = time.time() - page = self._get_page() - navigation_tracker = NavigationTracker(page) - navigation_tracker.start_nav_tracking() - if use_cache: - cache_available = check_if_extract_cache_available( - self, prompt, json_schema - ) - if cache_available: - logger.info("Cache available, attempting to use cached extraction") - result = attempt_extraction_with_backoff( - self, - prompt, - json_schema, - remaining_timeout=CACHE_TIMEOUT, - only_use_cache=True, - ) - if result: - return convert_and_return_result(result, type_spec) - logger.info( - "Using extraction agent to perform extraction, since no cache was found or failed." - ) - result = attempt_extraction_with_backoff( - self, - prompt, - json_schema, - remaining_timeout=timeout - (time.time() - start_time), - only_use_cache=False, - ) - if result: - return convert_and_return_result(result, type_spec) - logger.error(f"Extraction failed after {time.time() - start_time:.2f} seconds") - return None - - -def check_if_extract_cache_available( - obj: DendritePageProtocol, prompt: str, json_schema: Optional[JsonSchema] -) -> bool: - page = obj._get_page() - page_information = page.get_page_information(include_screenshot=False) - dto = ExtractDTO( - page_information=page_information, - api_config=obj._get_dendrite_browser().api_config, - prompt=prompt, - return_data_json_schema=json_schema, - ) - cache_response: CacheExtractResponse = ( - obj._get_browser_api_client().check_extract_cache(dto) - ) - return cache_response.exists - - -def attempt_extraction_with_backoff( - obj: DendritePageProtocol, - prompt: str, - json_schema: Optional[JsonSchema], - remaining_timeout: float = 180.0, - only_use_cache: bool = False, -) -> Optional[ExtractResponse]: - TIMEOUT_INTERVAL: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0] - total_elapsed_time = 0 - start_time = time.time() - for current_timeout in TIMEOUT_INTERVAL: - if total_elapsed_time >= remaining_timeout: - logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") - return None - request_start_time = time.time() - page = obj._get_page() - page_information = page.get_page_information( - include_screenshot=not only_use_cache - ) - extract_dto = ExtractDTO( - page_information=page_information, - api_config=obj._get_dendrite_browser().api_config, - prompt=prompt, - return_data_json_schema=json_schema, - use_screenshot=True, - use_cache=only_use_cache, - force_use_cache=only_use_cache, - ) - res = obj._get_browser_api_client().extract(extract_dto) - request_duration = time.time() - request_start_time - if res.status == "impossible": - logger.error(f"Impossible to extract data. Reason: {res.message}") - return None - if res.status == "success": - logger.success( - f"Extraction successful: '{res.message}'\nUsed cache: {res.used_cache}\nUsed script:\n\n{res.created_script}" - ) - return res - sleep_duration = max(0, current_timeout - request_duration) - logger.info( - f"Extraction attempt failed. Status: {res.status}\nMessage: {res.message}\nSleeping for {sleep_duration:.2f} seconds" - ) - time.sleep(sleep_duration) - total_elapsed_time = time.time() - start_time - logger.error( - f"All extraction attempts failed after {total_elapsed_time:.2f} seconds" - ) - return None - - -def convert_and_return_result( - res: ExtractResponse, type_spec: Optional[TypeSpec] -) -> TypeSpec: - converted_res = res.return_data - if type_spec is not None: - logger.debug("Converting extraction result to specified type") - converted_res = convert_to_type_spec(type_spec, res.return_data) - logger.info("Extraction process completed successfully") - return converted_res diff --git a/dendrite/sync_api/_core/mixin/get_element.py b/dendrite/sync_api/_core/mixin/get_element.py deleted file mode 100644 index ded124e..0000000 --- a/dendrite/sync_api/_core/mixin/get_element.py +++ /dev/null @@ -1,301 +0,0 @@ -import time -import time -from typing import Dict, List, Literal, Optional, Union, overload -from loguru import logger -from dendrite.sync_api._api.dto.get_elements_dto import GetElementsDTO -from dendrite.sync_api._api.response.get_element_response import GetElementResponse -from dendrite.sync_api._api.dto.get_elements_dto import CheckSelectorCacheDTO -from dendrite.sync_api._core._utils import get_elements_from_selectors_soup -from dendrite.sync_api._core.dendrite_element import Element -from dendrite.sync_api._core.models.response import ElementsResponse -from dendrite.sync_api._core.protocol.page_protocol import DendritePageProtocol -from dendrite.sync_api._core.models.api_config import APIConfig - -CACHE_TIMEOUT = 5 - - -class GetElementMixin(DendritePageProtocol): - - @overload - def get_elements( - self, - prompt_or_elements: str, - use_cache: bool = True, - timeout: int = 15000, - context: str = "", - ) -> List[Element]: - """ - Retrieves a list of Dendrite elements based on a string prompt. - - Args: - prompt_or_elements (str): The prompt describing the elements to be retrieved. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - context (str, optional): Additional context for the retrieval. Defaults to an empty string. - - Returns: - List[Element]: A list of Dendrite elements found on the page. - """ - - @overload - def get_elements( - self, - prompt_or_elements: Dict[str, str], - use_cache: bool = True, - timeout: int = 15000, - context: str = "", - ) -> ElementsResponse: - """ - Retrieves Dendrite elements based on a dictionary. - - Args: - prompt_or_elements (Dict[str, str]): A dictionary where keys are field names and values are prompts describing the elements to be retrieved. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - context (str, optional): Additional context for the retrieval. Defaults to an empty string. - - Returns: - ElementsResponse: A response object containing the retrieved elements with attributes matching the keys in the dict. - """ - - def get_elements( - self, - prompt_or_elements: Union[str, Dict[str, str]], - use_cache: bool = True, - timeout: int = 15000, - context: str = "", - ) -> Union[List[Element], ElementsResponse]: - """ - Retrieves Dendrite elements based on either a string prompt or a dictionary of prompts. - - This method determines the type of the input (string or dictionary) and retrieves the appropriate elements. - If the input is a string, it fetches a list of elements. If the input is a dictionary, it fetches elements for each key-value pair. - - Args: - prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - context (str, optional): Additional context for the retrieval. Defaults to an empty string. - - Returns: - Union[List[Element], ElementsResponse]: A list of elements or a response object containing the retrieved elements. - - Raises: - ValueError: If the input is neither a string nor a dictionary. - """ - return self._get_element( - prompt_or_elements, - only_one=False, - use_cache=use_cache, - timeout=timeout / 1000, - ) - - def get_element( - self, prompt: str, use_cache=True, timeout=15000 - ) -> Optional[Element]: - """ - Retrieves a single Dendrite element based on the provided prompt. - - Args: - prompt (str): The prompt describing the element to be retrieved. - use_cache (bool, optional): Whether to use cached results. Defaults to True. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - Element: The retrieved element. - """ - return self._get_element( - prompt, only_one=True, use_cache=use_cache, timeout=timeout / 1000 - ) - - @overload - def _get_element( - self, prompt_or_elements: str, only_one: Literal[True], use_cache: bool, timeout - ) -> Optional[Element]: - """ - Retrieves a single Dendrite element based on the provided prompt. - - Args: - prompt (Union[str, Dict[str, str]]): The prompt describing the element to be retrieved. - only_one (Literal[True]): Indicates that only one element should be retrieved. - use_cache (bool): Whether to use cached results. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - Element: The retrieved element. - """ - - @overload - def _get_element( - self, - prompt_or_elements: Union[str, Dict[str, str]], - only_one: Literal[False], - use_cache: bool, - timeout, - ) -> Union[List[Element], ElementsResponse]: - """ - Retrieves a list of Dendrite elements based on the provided prompt. - - Args: - prompt (str): The prompt describing the elements to be retrieved. - only_one (Literal[False]): Indicates that multiple elements should be retrieved. - use_cache (bool): Whether to use cached results. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - List[Element]: A list of retrieved elements. - """ - - def _get_element( - self, - prompt_or_elements: Union[str, Dict[str, str]], - only_one: bool, - use_cache: bool, - timeout: float, - ) -> Union[Optional[Element], List[Element], ElementsResponse]: - """ - Retrieves Dendrite elements based on the provided prompt, either a single element or a list of elements. - - This method sends a request with the prompt and retrieves the elements based on the `only_one` flag. - - Args: - prompt_or_elements (Union[str, Dict[str, str]]): The prompt or dictionary of prompts for element retrieval. - only_one (bool): Whether to retrieve only one element or a list of elements. - use_cache (bool): Whether to use cached results. - timeout (int, optional): Maximum time in milliseconds for the entire operation. If use_cache=True, - up to 5000ms will be spent attempting to use cached selectors before falling back to the - find element agent for the remaining time. Defaults to 15000 (15 seconds). - - Returns: - Union[Element, List[Element], ElementsResponse]: The retrieved element, list of elements, or response object. - """ - api_config = self._get_dendrite_browser().api_config - start_time = time.time() - page = self._get_page() - cache_available = test_if_cache_available(self, prompt_or_elements, page.url) - if cache_available and use_cache == True: - logger.info(f"Cache available, attempting to use cached selectors") - res = attempt_with_backoff( - self, - prompt_or_elements, - only_one, - api_config, - remaining_timeout=CACHE_TIMEOUT, - only_use_cache=True, - ) - if res: - return res - else: - logger.debug( - f"After attempting to use cached selectors several times without success, let's find the elements using the find element agent." - ) - logger.info( - "Proceeding to use the find element agent to find the requested elements." - ) - res = attempt_with_backoff( - self, - prompt_or_elements, - only_one, - api_config, - remaining_timeout=timeout - (time.time() - start_time), - only_use_cache=False, - ) - if res: - return res - logger.error( - f"Failed to retrieve elements within the specified timeout of {timeout} seconds" - ) - return None - - -def test_if_cache_available( - obj: DendritePageProtocol, prompt_or_elements: Union[str, Dict[str, str]], url: str -) -> bool: - dto = CheckSelectorCacheDTO(url=url, prompt=prompt_or_elements) - cache_available = obj._get_browser_api_client().check_selector_cache(dto) - return cache_available.exists - - -def attempt_with_backoff( - obj: DendritePageProtocol, - prompt_or_elements: Union[str, Dict[str, str]], - only_one: bool, - api_config: APIConfig, - remaining_timeout: float, - only_use_cache: bool = False, -) -> Union[Optional[Element], List[Element], ElementsResponse]: - TIMEOUT_INTERVAL: List[float] = [0.15, 0.45, 1.0, 2.0, 4.0, 8.0] - total_elapsed_time = 0 - start_time = time.time() - for current_timeout in TIMEOUT_INTERVAL: - if total_elapsed_time >= remaining_timeout: - logger.error(f"Timeout reached after {total_elapsed_time:.2f} seconds") - return None - request_start_time = time.time() - page = obj._get_page() - page_information = page.get_page_information( - include_screenshot=not only_use_cache - ) - dto = GetElementsDTO( - page_information=page_information, - prompt=prompt_or_elements, - api_config=api_config, - use_cache=only_use_cache, - only_one=only_one, - force_use_cache=only_use_cache, - ) - res = obj._get_browser_api_client().get_interactions_selector(dto) - request_duration = time.time() - request_start_time - if res.status == "impossible": - logger.error( - f"Impossible to get elements for '{prompt_or_elements}'. Reason: {res.message}" - ) - return None - if res.status == "success": - response = get_elements_from_selectors_soup( - page, page._get_previous_soup(), res, only_one - ) - if response: - return response - sleep_duration = max(0, current_timeout - request_duration) - logger.info( - f"Failed to get elements for prompt:\n\n'{prompt_or_elements}'\n\nStatus: {res.status}\n\nMessage: {res.message}\n\nSleeping for {sleep_duration:.2f} seconds" - ) - time.sleep(sleep_duration) - total_elapsed_time = time.time() - start_time - logger.error(f"All attempts failed after {total_elapsed_time:.2f} seconds") - return None - - -def get_elements_from_selectors( - obj: DendritePageProtocol, res: GetElementResponse, only_one: bool -) -> Union[Optional[Element], List[Element], ElementsResponse]: - if isinstance(res.selectors, dict): - result = {} - for key, selectors in res.selectors.items(): - for selector in selectors: - page = obj._get_page() - dendrite_elements = page._get_all_elements_from_selector(selector) - if len(dendrite_elements) > 0: - result[key] = dendrite_elements[0] - break - return ElementsResponse(result) - elif isinstance(res.selectors, list): - for selector in reversed(res.selectors): - page = obj._get_page() - dendrite_elements = page._get_all_elements_from_selector(selector) - if len(dendrite_elements) > 0: - return dendrite_elements[0] if only_one else dendrite_elements - return None diff --git a/dendrite/sync_api/_core/models/authentication.py b/dendrite/sync_api/_core/models/authentication.py deleted file mode 100644 index 3c2656e..0000000 --- a/dendrite/sync_api/_core/models/authentication.py +++ /dev/null @@ -1,47 +0,0 @@ -from pydantic import BaseModel -from typing import List, Literal, Optional -from typing_extensions import TypedDict - - -class Cookie(TypedDict, total=False): - name: str - value: str - domain: str - path: str - expires: float - httpOnly: bool - secure: bool - sameSite: Literal["Lax", "None", "Strict"] - - -class LocalStorageEntry(TypedDict): - name: str - value: str - - -class OriginState(TypedDict): - origin: str - localStorage: List[LocalStorageEntry] - - -class StorageState(TypedDict, total=False): - cookies: List[Cookie] - origins: List[OriginState] - - -class DomainState(BaseModel): - domain: str - storage_state: StorageState - - -class AuthSession(BaseModel): - user_agent: Optional[str] - domain_states: List[DomainState] - - def to_storage_state(self) -> StorageState: - cookies = [] - origins = [] - for domain_state in self.domain_states: - cookies.extend(domain_state.storage_state.get("cookies", [])) - origins.extend(domain_state.storage_state.get("origins", [])) - return StorageState(cookies=cookies, origins=origins) diff --git a/dendrite/sync_api/_core/models/page_diff_information.py b/dendrite/sync_api/_core/models/page_diff_information.py deleted file mode 100644 index d41d1fe..0000000 --- a/dendrite/sync_api/_core/models/page_diff_information.py +++ /dev/null @@ -1,7 +0,0 @@ -from pydantic import BaseModel -from dendrite.sync_api._core.models.page_information import PageInformation - - -class PageDiffInformation(BaseModel): - page_before: PageInformation - page_after: PageInformation diff --git a/dendrite/sync_api/_core/models/response.py b/dendrite/sync_api/_core/models/response.py deleted file mode 100644 index 76225c3..0000000 --- a/dendrite/sync_api/_core/models/response.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import Dict, Iterator -from dendrite.sync_api._core.dendrite_element import Element - - -class ElementsResponse: - """ - ElementsResponse is a class that encapsulates a dictionary of Dendrite elements, - allowing for attribute-style access and other convenient interactions. - - This class is used to store and access the elements retrieved by the `get_elements` function. - The attributes of this class dynamically match the keys of the dictionary passed to the `get_elements` function, - allowing for direct attribute-style access to the corresponding `Element` objects. - - Attributes: - _data (Dict[str, Element]): A dictionary where keys are the names of elements and values are the corresponding `Element` objects. - - Args: - data (Dict[str, Element]): The dictionary of elements to be encapsulated by the class. - - Methods: - __getattr__(name: str) -> Element: - Allows attribute-style access to the elements in the dictionary. - - __getitem__(key: str) -> Element: - Enables dictionary-style access to the elements. - - __iter__() -> Iterator[str]: - Provides an iterator over the keys in the dictionary. - - __repr__() -> str: - Returns a string representation of the class instance. - """ - - _data: Dict[str, Element] - - def __init__(self, data: Dict[str, Element]): - self._data = data - - def __getattr__(self, name: str) -> Element: - try: - return self._data[name] - except KeyError: - raise AttributeError( - f"'{self.__class__.__name__}' object has no attribute '{name}'" - ) - - def __getitem__(self, key: str) -> Element: - return self._data[key] - - def __iter__(self) -> Iterator[str]: - return iter(self._data) - - def __repr__(self) -> str: - return f"{self.__class__.__name__}({self._data})" diff --git a/dendrite/sync_api/_core/protocol/page_protocol.py b/dendrite/sync_api/_core/protocol/page_protocol.py deleted file mode 100644 index 17b5e9b..0000000 --- a/dendrite/sync_api/_core/protocol/page_protocol.py +++ /dev/null @@ -1,19 +0,0 @@ -from typing import TYPE_CHECKING, Protocol -from dendrite.sync_api._api.browser_api_client import BrowserAPIClient - -if TYPE_CHECKING: - from dendrite.sync_api._core.dendrite_page import Page - from dendrite.sync_api._core.dendrite_browser import Dendrite - - -class DendritePageProtocol(Protocol): - """ - Protocol that specifies the required methods and attributes - for the `ExtractionMixin` to work. - """ - - def _get_dendrite_browser(self) -> "Dendrite": ... - - def _get_browser_api_client(self) -> BrowserAPIClient: ... - - def _get_page(self) -> "Page": ... diff --git a/dendrite/sync_api/_dom/util/mild_strip.py b/dendrite/sync_api/_dom/util/mild_strip.py deleted file mode 100644 index 7cd6923..0000000 --- a/dendrite/sync_api/_dom/util/mild_strip.py +++ /dev/null @@ -1,32 +0,0 @@ -from bs4 import BeautifulSoup, Doctype, Tag, Comment - - -def mild_strip(soup: Tag, keep_d_id: bool = True) -> BeautifulSoup: - new_soup = BeautifulSoup(str(soup), "html.parser") - _mild_strip(new_soup, keep_d_id) - return new_soup - - -def mild_strip_in_place(soup: BeautifulSoup, keep_d_id: bool = True) -> None: - _mild_strip(soup, keep_d_id) - - -def _mild_strip(soup: BeautifulSoup, keep_d_id: bool = True) -> None: - for element in soup(text=lambda text: isinstance(text, Comment)): - element.extract() - for tag in soup( - ["head", "script", "style", "path", "polygon", "defs", "svg", "br", "Doctype"] - ): - tag.extract() - for element in soup.contents: - if isinstance(element, Doctype): - element.extract() - for tag in soup.find_all(True): - if tag.attrs.get("is-interactable-d_id") == "true": - continue - tag.attrs = { - attr: value[:100] if isinstance(value, str) else value - for (attr, value) in tag.attrs.items() - } - if keep_d_id == False: - del tag["d-id"] diff --git a/poetry.lock b/poetry.lock index c6da94b..82b61ed 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,127 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "aiohappyeyeballs" +version = "2.4.4" +description = "Happy Eyeballs for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"}, + {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"}, +] + +[[package]] +name = "aiohttp" +version = "3.11.11" +description = "Async http client/server framework (asyncio)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "aiohttp-3.11.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a60804bff28662cbcf340a4d61598891f12eea3a66af48ecfdc975ceec21e3c8"}, + {file = "aiohttp-3.11.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4b4fa1cb5f270fb3eab079536b764ad740bb749ce69a94d4ec30ceee1b5940d5"}, + {file = "aiohttp-3.11.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:731468f555656767cda219ab42e033355fe48c85fbe3ba83a349631541715ba2"}, + {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb23d8bb86282b342481cad4370ea0853a39e4a32a0042bb52ca6bdde132df43"}, + {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f047569d655f81cb70ea5be942ee5d4421b6219c3f05d131f64088c73bb0917f"}, + {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd7659baae9ccf94ae5fe8bfaa2c7bc2e94d24611528395ce88d009107e00c6d"}, + {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af01e42ad87ae24932138f154105e88da13ce7d202a6de93fafdafb2883a00ef"}, + {file = "aiohttp-3.11.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5854be2f3e5a729800bac57a8d76af464e160f19676ab6aea74bde18ad19d438"}, + {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6526e5fb4e14f4bbf30411216780c9967c20c5a55f2f51d3abd6de68320cc2f3"}, + {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:85992ee30a31835fc482468637b3e5bd085fa8fe9392ba0bdcbdc1ef5e9e3c55"}, + {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:88a12ad8ccf325a8a5ed80e6d7c3bdc247d66175afedbe104ee2aaca72960d8e"}, + {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0a6d3fbf2232e3a08c41eca81ae4f1dff3d8f1a30bae415ebe0af2d2458b8a33"}, + {file = "aiohttp-3.11.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:84a585799c58b795573c7fa9b84c455adf3e1d72f19a2bf498b54a95ae0d194c"}, + {file = "aiohttp-3.11.11-cp310-cp310-win32.whl", hash = "sha256:bfde76a8f430cf5c5584553adf9926534352251d379dcb266ad2b93c54a29745"}, + {file = "aiohttp-3.11.11-cp310-cp310-win_amd64.whl", hash = "sha256:0fd82b8e9c383af11d2b26f27a478640b6b83d669440c0a71481f7c865a51da9"}, + {file = "aiohttp-3.11.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ba74ec819177af1ef7f59063c6d35a214a8fde6f987f7661f4f0eecc468a8f76"}, + {file = "aiohttp-3.11.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4af57160800b7a815f3fe0eba9b46bf28aafc195555f1824555fa2cfab6c1538"}, + {file = "aiohttp-3.11.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffa336210cf9cd8ed117011085817d00abe4c08f99968deef0013ea283547204"}, + {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81b8fe282183e4a3c7a1b72f5ade1094ed1c6345a8f153506d114af5bf8accd9"}, + {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3af41686ccec6a0f2bdc66686dc0f403c41ac2089f80e2214a0f82d001052c03"}, + {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70d1f9dde0e5dd9e292a6d4d00058737052b01f3532f69c0c65818dac26dc287"}, + {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:249cc6912405917344192b9f9ea5cd5b139d49e0d2f5c7f70bdfaf6b4dbf3a2e"}, + {file = "aiohttp-3.11.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0eb98d90b6690827dcc84c246811feeb4e1eea683c0eac6caed7549be9c84665"}, + {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ec82bf1fda6cecce7f7b915f9196601a1bd1a3079796b76d16ae4cce6d0ef89b"}, + {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9fd46ce0845cfe28f108888b3ab17abff84ff695e01e73657eec3f96d72eef34"}, + {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bd176afcf8f5d2aed50c3647d4925d0db0579d96f75a31e77cbaf67d8a87742d"}, + {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:ec2aa89305006fba9ffb98970db6c8221541be7bee4c1d027421d6f6df7d1ce2"}, + {file = "aiohttp-3.11.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:92cde43018a2e17d48bb09c79e4d4cb0e236de5063ce897a5e40ac7cb4878773"}, + {file = "aiohttp-3.11.11-cp311-cp311-win32.whl", hash = "sha256:aba807f9569455cba566882c8938f1a549f205ee43c27b126e5450dc9f83cc62"}, + {file = "aiohttp-3.11.11-cp311-cp311-win_amd64.whl", hash = "sha256:ae545f31489548c87b0cced5755cfe5a5308d00407000e72c4fa30b19c3220ac"}, + {file = "aiohttp-3.11.11-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e595c591a48bbc295ebf47cb91aebf9bd32f3ff76749ecf282ea7f9f6bb73886"}, + {file = "aiohttp-3.11.11-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3ea1b59dc06396b0b424740a10a0a63974c725b1c64736ff788a3689d36c02d2"}, + {file = "aiohttp-3.11.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8811f3f098a78ffa16e0ea36dffd577eb031aea797cbdba81be039a4169e242c"}, + {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd7227b87a355ce1f4bf83bfae4399b1f5bb42e0259cb9405824bd03d2f4336a"}, + {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d40f9da8cabbf295d3a9dae1295c69975b86d941bc20f0a087f0477fa0a66231"}, + {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffb3dc385f6bb1568aa974fe65da84723210e5d9707e360e9ecb51f59406cd2e"}, + {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8f5f7515f3552d899c61202d99dcb17d6e3b0de777900405611cd747cecd1b8"}, + {file = "aiohttp-3.11.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3499c7ffbfd9c6a3d8d6a2b01c26639da7e43d47c7b4f788016226b1e711caa8"}, + {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8e2bf8029dbf0810c7bfbc3e594b51c4cc9101fbffb583a3923aea184724203c"}, + {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b6212a60e5c482ef90f2d788835387070a88d52cf6241d3916733c9176d39eab"}, + {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d119fafe7b634dbfa25a8c597718e69a930e4847f0b88e172744be24515140da"}, + {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:6fba278063559acc730abf49845d0e9a9e1ba74f85f0ee6efd5803f08b285853"}, + {file = "aiohttp-3.11.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:92fc484e34b733704ad77210c7957679c5c3877bd1e6b6d74b185e9320cc716e"}, + {file = "aiohttp-3.11.11-cp312-cp312-win32.whl", hash = "sha256:9f5b3c1ed63c8fa937a920b6c1bec78b74ee09593b3f5b979ab2ae5ef60d7600"}, + {file = "aiohttp-3.11.11-cp312-cp312-win_amd64.whl", hash = "sha256:1e69966ea6ef0c14ee53ef7a3d68b564cc408121ea56c0caa2dc918c1b2f553d"}, + {file = "aiohttp-3.11.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:541d823548ab69d13d23730a06f97460f4238ad2e5ed966aaf850d7c369782d9"}, + {file = "aiohttp-3.11.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:929f3ed33743a49ab127c58c3e0a827de0664bfcda566108989a14068f820194"}, + {file = "aiohttp-3.11.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0882c2820fd0132240edbb4a51eb8ceb6eef8181db9ad5291ab3332e0d71df5f"}, + {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b63de12e44935d5aca7ed7ed98a255a11e5cb47f83a9fded7a5e41c40277d104"}, + {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa54f8ef31d23c506910c21163f22b124facb573bff73930735cf9fe38bf7dff"}, + {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a344d5dc18074e3872777b62f5f7d584ae4344cd6006c17ba12103759d407af3"}, + {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7fb429ab1aafa1f48578eb315ca45bd46e9c37de11fe45c7f5f4138091e2f1"}, + {file = "aiohttp-3.11.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c341c7d868750e31961d6d8e60ff040fb9d3d3a46d77fd85e1ab8e76c3e9a5c4"}, + {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ed9ee95614a71e87f1a70bc81603f6c6760128b140bc4030abe6abaa988f1c3d"}, + {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:de8d38f1c2810fa2a4f1d995a2e9c70bb8737b18da04ac2afbf3971f65781d87"}, + {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a9b7371665d4f00deb8f32208c7c5e652059b0fda41cf6dbcac6114a041f1cc2"}, + {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:620598717fce1b3bd14dd09947ea53e1ad510317c85dda2c9c65b622edc96b12"}, + {file = "aiohttp-3.11.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bf8d9bfee991d8acc72d060d53860f356e07a50f0e0d09a8dfedea1c554dd0d5"}, + {file = "aiohttp-3.11.11-cp313-cp313-win32.whl", hash = "sha256:9d73ee3725b7a737ad86c2eac5c57a4a97793d9f442599bea5ec67ac9f4bdc3d"}, + {file = "aiohttp-3.11.11-cp313-cp313-win_amd64.whl", hash = "sha256:c7a06301c2fb096bdb0bd25fe2011531c1453b9f2c163c8031600ec73af1cc99"}, + {file = "aiohttp-3.11.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3e23419d832d969f659c208557de4a123e30a10d26e1e14b73431d3c13444c2e"}, + {file = "aiohttp-3.11.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:21fef42317cf02e05d3b09c028712e1d73a9606f02467fd803f7c1f39cc59add"}, + {file = "aiohttp-3.11.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1f21bb8d0235fc10c09ce1d11ffbd40fc50d3f08a89e4cf3a0c503dc2562247a"}, + {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1642eceeaa5ab6c9b6dfeaaa626ae314d808188ab23ae196a34c9d97efb68350"}, + {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2170816e34e10f2fd120f603e951630f8a112e1be3b60963a1f159f5699059a6"}, + {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8be8508d110d93061197fd2d6a74f7401f73b6d12f8822bbcd6d74f2b55d71b1"}, + {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4eed954b161e6b9b65f6be446ed448ed3921763cc432053ceb606f89d793927e"}, + {file = "aiohttp-3.11.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6c9af134da4bc9b3bd3e6a70072509f295d10ee60c697826225b60b9959acdd"}, + {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:44167fc6a763d534a6908bdb2592269b4bf30a03239bcb1654781adf5e49caf1"}, + {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:479b8c6ebd12aedfe64563b85920525d05d394b85f166b7873c8bde6da612f9c"}, + {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:10b4ff0ad793d98605958089fabfa350e8e62bd5d40aa65cdc69d6785859f94e"}, + {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:b540bd67cfb54e6f0865ceccd9979687210d7ed1a1cc8c01f8e67e2f1e883d28"}, + {file = "aiohttp-3.11.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1dac54e8ce2ed83b1f6b1a54005c87dfed139cf3f777fdc8afc76e7841101226"}, + {file = "aiohttp-3.11.11-cp39-cp39-win32.whl", hash = "sha256:568c1236b2fde93b7720f95a890741854c1200fba4a3471ff48b2934d2d93fd3"}, + {file = "aiohttp-3.11.11-cp39-cp39-win_amd64.whl", hash = "sha256:943a8b052e54dfd6439fd7989f67fc6a7f2138d0a2cf0a7de5f18aa4fe7eb3b1"}, + {file = "aiohttp-3.11.11.tar.gz", hash = "sha256:bb49c7f1e6ebf3821a42d81d494f538107610c3a705987f53068546b0e90303e"}, +] + +[package.dependencies] +aiohappyeyeballs = ">=2.3.0" +aiosignal = ">=1.1.2" +async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} +attrs = ">=17.3.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +propcache = ">=0.2.0" +yarl = ">=1.17.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] + +[[package]] +name = "aiosignal" +version = "1.3.2" +description = "aiosignal: a list of registered asynchronous callbacks" +optional = false +python-versions = ">=3.9" +files = [ + {file = "aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5"}, + {file = "aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" [[package]] name = "annotated-types" @@ -11,42 +134,111 @@ files = [ {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, ] +[[package]] +name = "anthropic" +version = "0.42.0" +description = "The official Python library for the anthropic API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anthropic-0.42.0-py3-none-any.whl", hash = "sha256:46775f65b723c078a2ac9e9de44a46db5c6a4fabeacfd165e5ea78e6817f4eff"}, + {file = "anthropic-0.42.0.tar.gz", hash = "sha256:bf8b0ed8c8cb2c2118038f29c58099d2f99f7847296cafdaa853910bfff4edf4"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +typing-extensions = ">=4.10,<5" + +[package.extras] +bedrock = ["boto3 (>=1.28.57)", "botocore (>=1.31.57)"] +vertex = ["google-auth (>=2,<3)"] + [[package]] name = "anyio" -version = "4.6.2.post1" +version = "4.8.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.9" files = [ - {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"}, - {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"}, + {file = "anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a"}, + {file = "anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a"}, ] [package.dependencies] exceptiongroup = {version = ">=1.0.2", markers = "python_version < \"3.11\""} idna = ">=2.8" sniffio = ">=1.1" -typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} +typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] -doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"] +doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx_rtd_theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21)"] trio = ["trio (>=0.26.1)"] [[package]] name = "astroid" -version = "3.3.5" +version = "3.3.8" description = "An abstract syntax tree for Python with inference support." optional = false python-versions = ">=3.9.0" files = [ - {file = "astroid-3.3.5-py3-none-any.whl", hash = "sha256:a9d1c946ada25098d790e079ba2a1b112157278f3fb7e718ae6a9252f5835dc8"}, - {file = "astroid-3.3.5.tar.gz", hash = "sha256:5cfc40ae9f68311075d27ef68a4841bdc5cc7f6cf86671b49f00607d30188e2d"}, + {file = "astroid-3.3.8-py3-none-any.whl", hash = "sha256:187ccc0c248bfbba564826c26f070494f7bc964fd286b6d9fff4420e55de828c"}, + {file = "astroid-3.3.8.tar.gz", hash = "sha256:a88c7994f914a4ea8572fac479459f4955eeccc877be3f2d959a33273b0cf40b"}, ] [package.dependencies] typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} +[[package]] +name = "async-timeout" +version = "5.0.1" +description = "Timeout context manager for asyncio programs" +optional = false +python-versions = ">=3.8" +files = [ + {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, + {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, +] + +[[package]] +name = "attrs" +version = "24.3.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.8" +files = [ + {file = "attrs-24.3.0-py3-none-any.whl", hash = "sha256:ac96cd038792094f438ad1f6ff80837353805ac950cd2aa0e0625ef19850c308"}, + {file = "attrs-24.3.0.tar.gz", hash = "sha256:8f5c07333d543103541ba7be0e2ce16eeee8130cb0b3f9238ab904ce1e85baff"}, +] + +[package.extras] +benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit-uv", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"] +tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] +tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] + +[[package]] +name = "autoflake" +version = "2.3.1" +description = "Removes unused imports and unused variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840"}, + {file = "autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e"}, +] + +[package.dependencies] +pyflakes = ">=3.0.0" +tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} + [[package]] name = "autopep8" version = "2.3.1" @@ -145,24 +337,125 @@ beautifulsoup4 = "*" [[package]] name = "certifi" -version = "2024.8.30" +version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, - {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, + {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, + {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.1" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7" +files = [ + {file = "charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f"}, + {file = "charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-win32.whl", hash = "sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b"}, + {file = "charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-win32.whl", hash = "sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35"}, + {file = "charset_normalizer-3.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407"}, + {file = "charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_ppc64le.whl", hash = "sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_s390x.whl", hash = "sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-win32.whl", hash = "sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487"}, + {file = "charset_normalizer-3.4.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-win32.whl", hash = "sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e"}, + {file = "charset_normalizer-3.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-win32.whl", hash = "sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5"}, + {file = "charset_normalizer-3.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765"}, + {file = "charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85"}, + {file = "charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3"}, ] [[package]] name = "click" -version = "8.1.7" +version = "8.1.8" description = "Composable command line interface toolkit" optional = false python-versions = ">=3.7" files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, ] [package.dependencies] @@ -194,6 +487,17 @@ files = [ graph = ["objgraph (>=1.7.2)"] profile = ["gprof2dot (>=2022.7.29)"] +[[package]] +name = "distro" +version = "1.9.0" +description = "Distro - an OS platform information API" +optional = false +python-versions = ">=3.6" +files = [ + {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, + {file = "distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed"}, +] + [[package]] name = "exceptiongroup" version = "1.2.2" @@ -208,6 +512,22 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "filelock" +version = "3.16.1" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, + {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, +] + +[package.extras] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] +typing = ["typing-extensions (>=4.12.2)"] + [[package]] name = "flake8" version = "7.1.1" @@ -224,6 +544,146 @@ mccabe = ">=0.7.0,<0.8.0" pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" +[[package]] +name = "frozenlist" +version = "1.5.0" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"}, + {file = "frozenlist-1.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:15538c0cbf0e4fa11d1e3a71f823524b0c46299aed6e10ebb4c2089abd8c3bec"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e79225373c317ff1e35f210dd5f1344ff31066ba8067c307ab60254cd3a78ad5"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9272fa73ca71266702c4c3e2d4a28553ea03418e591e377a03b8e3659d94fa76"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:498524025a5b8ba81695761d78c8dd7382ac0b052f34e66939c42df860b8ff17"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92b5278ed9d50fe610185ecd23c55d8b307d75ca18e94c0e7de328089ac5dcba"}, + {file = "frozenlist-1.5.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f3c8c1dacd037df16e85227bac13cca58c30da836c6f936ba1df0c05d046d8d"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f2ac49a9bedb996086057b75bf93538240538c6d9b38e57c82d51f75a73409d2"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e66cc454f97053b79c2ab09c17fbe3c825ea6b4de20baf1be28919460dd7877f"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:5a3ba5f9a0dfed20337d3e966dc359784c9f96503674c2faf015f7fe8e96798c"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6321899477db90bdeb9299ac3627a6a53c7399c8cd58d25da094007402b039ab"}, + {file = "frozenlist-1.5.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:76e4753701248476e6286f2ef492af900ea67d9706a0155335a40ea21bf3b2f5"}, + {file = "frozenlist-1.5.0-cp310-cp310-win32.whl", hash = "sha256:977701c081c0241d0955c9586ffdd9ce44f7a7795df39b9151cd9a6fd0ce4cfb"}, + {file = "frozenlist-1.5.0-cp310-cp310-win_amd64.whl", hash = "sha256:189f03b53e64144f90990d29a27ec4f7997d91ed3d01b51fa39d2dbe77540fd4"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:fd74520371c3c4175142d02a976aee0b4cb4a7cc912a60586ffd8d5929979b30"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2f3f7a0fbc219fb4455264cae4d9f01ad41ae6ee8524500f381de64ffaa077d5"}, + {file = "frozenlist-1.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f47c9c9028f55a04ac254346e92977bf0f166c483c74b4232bee19a6697e4778"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0996c66760924da6e88922756d99b47512a71cfd45215f3570bf1e0b694c206a"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a2fe128eb4edeabe11896cb6af88fca5346059f6c8d807e3b910069f39157869"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8ea951bbb6cacd492e3948b8da8c502a3f814f5d20935aae74b5df2b19cf3d"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:de537c11e4aa01d37db0d403b57bd6f0546e71a82347a97c6a9f0dcc532b3a45"}, + {file = "frozenlist-1.5.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c2623347b933fcb9095841f1cc5d4ff0b278addd743e0e966cb3d460278840d"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cee6798eaf8b1416ef6909b06f7dc04b60755206bddc599f52232606e18179d3"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f5f9da7f5dbc00a604fe74aa02ae7c98bcede8a3b8b9666f9f86fc13993bc71a"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:90646abbc7a5d5c7c19461d2e3eeb76eb0b204919e6ece342feb6032c9325ae9"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:bdac3c7d9b705d253b2ce370fde941836a5f8b3c5c2b8fd70940a3ea3af7f4f2"}, + {file = "frozenlist-1.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03d33c2ddbc1816237a67f66336616416e2bbb6beb306e5f890f2eb22b959cdf"}, + {file = "frozenlist-1.5.0-cp311-cp311-win32.whl", hash = "sha256:237f6b23ee0f44066219dae14c70ae38a63f0440ce6750f868ee08775073f942"}, + {file = "frozenlist-1.5.0-cp311-cp311-win_amd64.whl", hash = "sha256:0cc974cc93d32c42e7b0f6cf242a6bd941c57c61b618e78b6c0a96cb72788c1d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:31115ba75889723431aa9a4e77d5f398f5cf976eea3bdf61749731f62d4a4a21"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7437601c4d89d070eac8323f121fcf25f88674627505334654fd027b091db09d"}, + {file = "frozenlist-1.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7948140d9f8ece1745be806f2bfdf390127cf1a763b925c4a805c603df5e697e"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:feeb64bc9bcc6b45c6311c9e9b99406660a9c05ca8a5b30d14a78555088b0b3a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:683173d371daad49cffb8309779e886e59c2f369430ad28fe715f66d08d4ab1a"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d57d8f702221405a9d9b40f9da8ac2e4a1a8b5285aac6100f3393675f0a85ee"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30c72000fbcc35b129cb09956836c7d7abf78ab5416595e4857d1cae8d6251a6"}, + {file = "frozenlist-1.5.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:000a77d6034fbad9b6bb880f7ec073027908f1b40254b5d6f26210d2dab1240e"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5d7f5a50342475962eb18b740f3beecc685a15b52c91f7d975257e13e029eca9"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:87f724d055eb4785d9be84e9ebf0f24e392ddfad00b3fe036e43f489fafc9039"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:6e9080bb2fb195a046e5177f10d9d82b8a204c0736a97a153c2466127de87784"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9b93d7aaa36c966fa42efcaf716e6b3900438632a626fb09c049f6a2f09fc631"}, + {file = "frozenlist-1.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:52ef692a4bc60a6dd57f507429636c2af8b6046db8b31b18dac02cbc8f507f7f"}, + {file = "frozenlist-1.5.0-cp312-cp312-win32.whl", hash = "sha256:29d94c256679247b33a3dc96cce0f93cbc69c23bf75ff715919332fdbb6a32b8"}, + {file = "frozenlist-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:8969190d709e7c48ea386db202d708eb94bdb29207a1f269bab1196ce0dcca1f"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a1a048f9215c90973402e26c01d1cff8a209e1f1b53f72b95c13db61b00f953"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dd47a5181ce5fcb463b5d9e17ecfdb02b678cca31280639255ce9d0e5aa67af0"}, + {file = "frozenlist-1.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1431d60b36d15cda188ea222033eec8e0eab488f39a272461f2e6d9e1a8e63c2"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6482a5851f5d72767fbd0e507e80737f9c8646ae7fd303def99bfe813f76cf7f"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44c49271a937625619e862baacbd037a7ef86dd1ee215afc298a417ff3270608"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12f78f98c2f1c2429d42e6a485f433722b0061d5c0b0139efa64f396efb5886b"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce3aa154c452d2467487765e3adc730a8c153af77ad84096bc19ce19a2400840"}, + {file = "frozenlist-1.5.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b7dc0c4338e6b8b091e8faf0db3168a37101943e687f373dce00959583f7439"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:45e0896250900b5aa25180f9aec243e84e92ac84bd4a74d9ad4138ef3f5c97de"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:561eb1c9579d495fddb6da8959fd2a1fca2c6d060d4113f5844b433fc02f2641"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:df6e2f325bfee1f49f81aaac97d2aa757c7646534a06f8f577ce184afe2f0a9e"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:140228863501b44b809fb39ec56b5d4071f4d0aa6d216c19cbb08b8c5a7eadb9"}, + {file = "frozenlist-1.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7707a25d6a77f5d27ea7dc7d1fc608aa0a478193823f88511ef5e6b8a48f9d03"}, + {file = "frozenlist-1.5.0-cp313-cp313-win32.whl", hash = "sha256:31a9ac2b38ab9b5a8933b693db4939764ad3f299fcaa931a3e605bc3460e693c"}, + {file = "frozenlist-1.5.0-cp313-cp313-win_amd64.whl", hash = "sha256:11aabdd62b8b9c4b84081a3c246506d1cddd2dd93ff0ad53ede5defec7886b28"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:dd94994fc91a6177bfaafd7d9fd951bc8689b0a98168aa26b5f543868548d3ca"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2d0da8bbec082bf6bf18345b180958775363588678f64998c2b7609e34719b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73f2e31ea8dd7df61a359b731716018c2be196e5bb3b74ddba107f694fbd7604"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:828afae9f17e6de596825cf4228ff28fbdf6065974e5ac1410cecc22f699d2b3"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1577515d35ed5649d52ab4319db757bb881ce3b2b796d7283e6634d99ace307"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2150cc6305a2c2ab33299453e2968611dacb970d2283a14955923062c8d00b10"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a72b7a6e3cd2725eff67cd64c8f13335ee18fc3c7befc05aed043d24c7b9ccb9"}, + {file = "frozenlist-1.5.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c16d2fa63e0800723139137d667e1056bee1a1cf7965153d2d104b62855e9b99"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:17dcc32fc7bda7ce5875435003220a457bcfa34ab7924a49a1c19f55b6ee185c"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:97160e245ea33d8609cd2b8fd997c850b56db147a304a262abc2b3be021a9171"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f1e6540b7fa044eee0bb5111ada694cf3dc15f2b0347ca125ee9ca984d5e9e6e"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:91d6c171862df0a6c61479d9724f22efb6109111017c87567cfeb7b5d1449fdf"}, + {file = "frozenlist-1.5.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c1fac3e2ace2eb1052e9f7c7db480818371134410e1f5c55d65e8f3ac6d1407e"}, + {file = "frozenlist-1.5.0-cp38-cp38-win32.whl", hash = "sha256:b97f7b575ab4a8af9b7bc1d2ef7f29d3afee2226bd03ca3875c16451ad5a7723"}, + {file = "frozenlist-1.5.0-cp38-cp38-win_amd64.whl", hash = "sha256:374ca2dabdccad8e2a76d40b1d037f5bd16824933bf7bcea3e59c891fd4a0923"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:9bbcdfaf4af7ce002694a4e10a0159d5a8d20056a12b05b45cea944a4953f972"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1893f948bf6681733aaccf36c5232c231e3b5166d607c5fa77773611df6dc336"}, + {file = "frozenlist-1.5.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2b5e23253bb709ef57a8e95e6ae48daa9ac5f265637529e4ce6b003a37b2621f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f253985bb515ecd89629db13cb58d702035ecd8cfbca7d7a7e29a0e6d39af5f"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04a5c6babd5e8fb7d3c871dc8b321166b80e41b637c31a995ed844a6139942b6"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fe0f1c29ba24ba6ff6abf688cb0b7cf1efab6b6aa6adc55441773c252f7411"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:226d72559fa19babe2ccd920273e767c96a49b9d3d38badd7c91a0fdeda8ea08"}, + {file = "frozenlist-1.5.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15b731db116ab3aedec558573c1a5eec78822b32292fe4f2f0345b7f697745c2"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:366d8f93e3edfe5a918c874702f78faac300209a4d5bf38352b2c1bdc07a766d"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:1b96af8c582b94d381a1c1f51ffaedeb77c821c690ea5f01da3d70a487dd0a9b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c03eff4a41bd4e38415cbed054bbaff4a075b093e2394b6915dca34a40d1e38b"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:50cf5e7ee9b98f22bdecbabf3800ae78ddcc26e4a435515fc72d97903e8488e0"}, + {file = "frozenlist-1.5.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1e76bfbc72353269c44e0bc2cfe171900fbf7f722ad74c9a7b638052afe6a00c"}, + {file = "frozenlist-1.5.0-cp39-cp39-win32.whl", hash = "sha256:666534d15ba8f0fda3f53969117383d5dc021266b3c1a42c9ec4855e4b58b9d3"}, + {file = "frozenlist-1.5.0-cp39-cp39-win_amd64.whl", hash = "sha256:5c28f4b5dbef8a0d8aad0d4de24d1e9e981728628afaf4ea0792f5d0939372f0"}, + {file = "frozenlist-1.5.0-py3-none-any.whl", hash = "sha256:d994863bba198a4a518b467bb971c56e1db3f180a25c6cf7bb1949c267f748c3"}, + {file = "frozenlist-1.5.0.tar.gz", hash = "sha256:81d5af29e61b9c8348e876d442253723928dce6433e0e76cd925cd83f1b4b817"}, +] + +[[package]] +name = "fsspec" +version = "2024.12.0" +description = "File-system specification" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2"}, + {file = "fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f"}, +] + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +dev = ["pre-commit", "ruff"] +doc = ["numpydoc", "sphinx", "sphinx-design", "sphinx-rtd-theme", "yarl"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"] +test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"] +test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"] +tqdm = ["tqdm"] + [[package]] name = "greenlet" version = "3.1.1" @@ -323,13 +783,13 @@ files = [ [[package]] name = "httpcore" -version = "1.0.6" +version = "1.0.7" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"}, - {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"}, + {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, + {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, ] [package.dependencies] @@ -367,6 +827,40 @@ http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "huggingface-hub" +version = "0.27.1" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "huggingface_hub-0.27.1-py3-none-any.whl", hash = "sha256:1c5155ca7d60b60c2e2fc38cbb3ffb7f7c3adf48f824015b219af9061771daec"}, + {file = "huggingface_hub-0.27.1.tar.gz", hash = "sha256:c004463ca870283909d715d20f066ebd6968c2207dae9393fdffb3c1d4d8f98b"}, +] + +[package.dependencies] +filelock = "*" +fsspec = ">=2023.5.0" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "libcst (==1.4.0)", "mypy (==1.5.1)", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.5.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +hf-transfer = ["hf-transfer (>=0.1.4)"] +inference = ["aiohttp"] +quality = ["libcst (==1.4.0)", "mypy (==1.5.1)", "ruff (>=0.5.0)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +tensorflow-testing = ["keras (<3.0)", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "fastapi", "gradio (>=4.0.0)", "jedi", "numpy", "pytest (>=8.1.1,<8.2.2)", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-mock", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["safetensors[torch]", "torch"] +typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"] + [[package]] name = "idna" version = "3.10" @@ -381,6 +875,29 @@ files = [ [package.extras] all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] +[[package]] +name = "importlib-metadata" +version = "8.5.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"}, + {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"}, +] + +[package.dependencies] +zipp = ">=3.20" + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +perf = ["ipython"] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +type = ["pytest-mypy"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -406,15 +923,191 @@ files = [ [package.extras] colors = ["colorama (>=0.4.6)"] +[[package]] +name = "jinja2" +version = "3.1.5" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"}, + {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "jiter" +version = "0.8.2" +description = "Fast iterable JSON parser." +optional = false +python-versions = ">=3.8" +files = [ + {file = "jiter-0.8.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ca8577f6a413abe29b079bc30f907894d7eb07a865c4df69475e868d73e71c7b"}, + {file = "jiter-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b25bd626bde7fb51534190c7e3cb97cee89ee76b76d7585580e22f34f5e3f393"}, + {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5c826a221851a8dc028eb6d7d6429ba03184fa3c7e83ae01cd6d3bd1d4bd17d"}, + {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d35c864c2dff13dfd79fb070fc4fc6235d7b9b359efe340e1261deb21b9fcb66"}, + {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f557c55bc2b7676e74d39d19bcb8775ca295c7a028246175d6a8b431e70835e5"}, + {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:580ccf358539153db147e40751a0b41688a5ceb275e6f3e93d91c9467f42b2e3"}, + {file = "jiter-0.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af102d3372e917cffce49b521e4c32c497515119dc7bd8a75665e90a718bbf08"}, + {file = "jiter-0.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cadcc978f82397d515bb2683fc0d50103acff2a180552654bb92d6045dec2c49"}, + {file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ba5bdf56969cad2019d4e8ffd3f879b5fdc792624129741d3d83fc832fef8c7d"}, + {file = "jiter-0.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3b94a33a241bee9e34b8481cdcaa3d5c2116f575e0226e421bed3f7a6ea71cff"}, + {file = "jiter-0.8.2-cp310-cp310-win32.whl", hash = "sha256:6e5337bf454abddd91bd048ce0dca5134056fc99ca0205258766db35d0a2ea43"}, + {file = "jiter-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4a9220497ca0cb1fe94e3f334f65b9b5102a0b8147646118f020d8ce1de70105"}, + {file = "jiter-0.8.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:2dd61c5afc88a4fda7d8b2cf03ae5947c6ac7516d32b7a15bf4b49569a5c076b"}, + {file = "jiter-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a6c710d657c8d1d2adbbb5c0b0c6bfcec28fd35bd6b5f016395f9ac43e878a15"}, + {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9584de0cd306072635fe4b89742bf26feae858a0683b399ad0c2509011b9dc0"}, + {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5a90a923338531b7970abb063cfc087eebae6ef8ec8139762007188f6bc69a9f"}, + {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21974d246ed0181558087cd9f76e84e8321091ebfb3a93d4c341479a736f099"}, + {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:32475a42b2ea7b344069dc1e81445cfc00b9d0e3ca837f0523072432332e9f74"}, + {file = "jiter-0.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b9931fd36ee513c26b5bf08c940b0ac875de175341cbdd4fa3be109f0492586"}, + {file = "jiter-0.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce0820f4a3a59ddced7fce696d86a096d5cc48d32a4183483a17671a61edfddc"}, + {file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8ffc86ae5e3e6a93765d49d1ab47b6075a9c978a2b3b80f0f32628f39caa0c88"}, + {file = "jiter-0.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5127dc1abd809431172bc3fbe8168d6b90556a30bb10acd5ded41c3cfd6f43b6"}, + {file = "jiter-0.8.2-cp311-cp311-win32.whl", hash = "sha256:66227a2c7b575720c1871c8800d3a0122bb8ee94edb43a5685aa9aceb2782d44"}, + {file = "jiter-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:cde031d8413842a1e7501e9129b8e676e62a657f8ec8166e18a70d94d4682855"}, + {file = "jiter-0.8.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e6ec2be506e7d6f9527dae9ff4b7f54e68ea44a0ef6b098256ddf895218a2f8f"}, + {file = "jiter-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:76e324da7b5da060287c54f2fabd3db5f76468006c811831f051942bf68c9d44"}, + {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:180a8aea058f7535d1c84183c0362c710f4750bef66630c05f40c93c2b152a0f"}, + {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:025337859077b41548bdcbabe38698bcd93cfe10b06ff66617a48ff92c9aec60"}, + {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecff0dc14f409599bbcafa7e470c00b80f17abc14d1405d38ab02e4b42e55b57"}, + {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ffd9fee7d0775ebaba131f7ca2e2d83839a62ad65e8e02fe2bd8fc975cedeb9e"}, + {file = "jiter-0.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14601dcac4889e0a1c75ccf6a0e4baf70dbc75041e51bcf8d0e9274519df6887"}, + {file = "jiter-0.8.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92249669925bc1c54fcd2ec73f70f2c1d6a817928480ee1c65af5f6b81cdf12d"}, + {file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e725edd0929fa79f8349ab4ec7f81c714df51dc4e991539a578e5018fa4a7152"}, + {file = "jiter-0.8.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bf55846c7b7a680eebaf9c3c48d630e1bf51bdf76c68a5f654b8524335b0ad29"}, + {file = "jiter-0.8.2-cp312-cp312-win32.whl", hash = "sha256:7efe4853ecd3d6110301665a5178b9856be7e2a9485f49d91aa4d737ad2ae49e"}, + {file = "jiter-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:83c0efd80b29695058d0fd2fa8a556490dbce9804eac3e281f373bbc99045f6c"}, + {file = "jiter-0.8.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ca1f08b8e43dc3bd0594c992fb1fd2f7ce87f7bf0d44358198d6da8034afdf84"}, + {file = "jiter-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5672a86d55416ccd214c778efccf3266b84f87b89063b582167d803246354be4"}, + {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58dc9bc9767a1101f4e5e22db1b652161a225874d66f0e5cb8e2c7d1c438b587"}, + {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37b2998606d6dadbb5ccda959a33d6a5e853252d921fec1792fc902351bb4e2c"}, + {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ab9a87f3784eb0e098f84a32670cfe4a79cb6512fd8f42ae3d0709f06405d18"}, + {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:79aec8172b9e3c6d05fd4b219d5de1ac616bd8da934107325a6c0d0e866a21b6"}, + {file = "jiter-0.8.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:711e408732d4e9a0208008e5892c2966b485c783cd2d9a681f3eb147cf36c7ef"}, + {file = "jiter-0.8.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:653cf462db4e8c41995e33d865965e79641ef45369d8a11f54cd30888b7e6ff1"}, + {file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:9c63eaef32b7bebac8ebebf4dabebdbc6769a09c127294db6babee38e9f405b9"}, + {file = "jiter-0.8.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:eb21aaa9a200d0a80dacc7a81038d2e476ffe473ffdd9c91eb745d623561de05"}, + {file = "jiter-0.8.2-cp313-cp313-win32.whl", hash = "sha256:789361ed945d8d42850f919342a8665d2dc79e7e44ca1c97cc786966a21f627a"}, + {file = "jiter-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:ab7f43235d71e03b941c1630f4b6e3055d46b6cb8728a17663eaac9d8e83a865"}, + {file = "jiter-0.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b426f72cd77da3fec300ed3bc990895e2dd6b49e3bfe6c438592a3ba660e41ca"}, + {file = "jiter-0.8.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2dd880785088ff2ad21ffee205e58a8c1ddabc63612444ae41e5e4b321b39c0"}, + {file = "jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566"}, + {file = "jiter-0.8.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9e1fa156ee9454642adb7e7234a383884452532bc9d53d5af2d18d98ada1d79c"}, + {file = "jiter-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cf5dfa9956d96ff2efb0f8e9c7d055904012c952539a774305aaaf3abdf3d6c"}, + {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e52bf98c7e727dd44f7c4acb980cb988448faeafed8433c867888268899b298b"}, + {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a2ecaa3c23e7a7cf86d00eda3390c232f4d533cd9ddea4b04f5d0644faf642c5"}, + {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:08d4c92bf480e19fc3f2717c9ce2aa31dceaa9163839a311424b6862252c943e"}, + {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d9a1eded738299ba8e106c6779ce5c3893cffa0e32e4485d680588adae6db8"}, + {file = "jiter-0.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d20be8b7f606df096e08b0b1b4a3c6f0515e8dac296881fe7461dfa0fb5ec817"}, + {file = "jiter-0.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d33f94615fcaf872f7fd8cd98ac3b429e435c77619777e8a449d9d27e01134d1"}, + {file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:317b25e98a35ffec5c67efe56a4e9970852632c810d35b34ecdd70cc0e47b3b6"}, + {file = "jiter-0.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fc9043259ee430ecd71d178fccabd8c332a3bf1e81e50cae43cc2b28d19e4cb7"}, + {file = "jiter-0.8.2-cp38-cp38-win32.whl", hash = "sha256:fc5adda618205bd4678b146612ce44c3cbfdee9697951f2c0ffdef1f26d72b63"}, + {file = "jiter-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:cd646c827b4f85ef4a78e4e58f4f5854fae0caf3db91b59f0d73731448a970c6"}, + {file = "jiter-0.8.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e41e75344acef3fc59ba4765df29f107f309ca9e8eace5baacabd9217e52a5ee"}, + {file = "jiter-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f22b16b35d5c1df9dfd58843ab2cd25e6bf15191f5a236bed177afade507bfc"}, + {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7200b8f7619d36aa51c803fd52020a2dfbea36ffec1b5e22cab11fd34d95a6d"}, + {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:70bf4c43652cc294040dbb62256c83c8718370c8b93dd93d934b9a7bf6c4f53c"}, + {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9d471356dc16f84ed48768b8ee79f29514295c7295cb41e1133ec0b2b8d637d"}, + {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859e8eb3507894093d01929e12e267f83b1d5f6221099d3ec976f0c995cb6bd9"}, + {file = "jiter-0.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaa58399c01db555346647a907b4ef6d4f584b123943be6ed5588c3f2359c9f4"}, + {file = "jiter-0.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8f2d5ed877f089862f4c7aacf3a542627c1496f972a34d0474ce85ee7d939c27"}, + {file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:03c9df035d4f8d647f8c210ddc2ae0728387275340668fb30d2421e17d9a0841"}, + {file = "jiter-0.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8bd2a824d08d8977bb2794ea2682f898ad3d8837932e3a74937e93d62ecbb637"}, + {file = "jiter-0.8.2-cp39-cp39-win32.whl", hash = "sha256:ca29b6371ebc40e496995c94b988a101b9fbbed48a51190a4461fcb0a68b4a36"}, + {file = "jiter-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:1c0dfbd1be3cbefc7510102370d86e35d1d53e5a93d48519688b1bf0f761160a"}, + {file = "jiter-0.8.2.tar.gz", hash = "sha256:cd73d3e740666d0e639f678adb176fad25c1bcbdae88d8d7b857e1783bb4212d"}, +] + +[[package]] +name = "json-repair" +version = "0.30.3" +description = "A package to repair broken json strings" +optional = false +python-versions = ">=3.9" +files = [ + {file = "json_repair-0.30.3-py3-none-any.whl", hash = "sha256:63bb588162b0958ae93d85356ecbe54c06b8c33f8a4834f93fa2719ea669804e"}, + {file = "json_repair-0.30.3.tar.gz", hash = "sha256:0ac56e7ae9253ee9c507a7e1a3a26799c9b0bbe5e2bec1b2cc5053e90d5b05e3"}, +] + +[[package]] +name = "jsonschema" +version = "4.23.0" +description = "An implementation of JSON Schema validation for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, + {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +jsonschema-specifications = ">=2023.03.6" +referencing = ">=0.28.4" +rpds-py = ">=0.7.1" + +[package.extras] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=24.6.0)"] + +[[package]] +name = "jsonschema-specifications" +version = "2024.10.1" +description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +optional = false +python-versions = ">=3.9" +files = [ + {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, + {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"}, +] + +[package.dependencies] +referencing = ">=0.31.0" + +[[package]] +name = "litellm" +version = "1.57.0" +description = "Library to easily interface with LLM API providers" +optional = false +python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" +files = [ + {file = "litellm-1.57.0-py3-none-any.whl", hash = "sha256:339aec6f3ecac2035bf6311aa8913ce587c9aca2dc7d72a63a210c659e9721ca"}, + {file = "litellm-1.57.0.tar.gz", hash = "sha256:53a6f2bd9575823e102f7d18dde5cbd2d48eed027cecbb585f18a208605b34c5"}, +] + +[package.dependencies] +aiohttp = "*" +click = "*" +httpx = ">=0.23.0,<0.28.0" +importlib-metadata = ">=6.8.0" +jinja2 = ">=3.1.2,<4.0.0" +jsonschema = ">=4.22.0,<5.0.0" +openai = ">=1.55.3" +pydantic = ">=2.0.0,<3.0.0" +python-dotenv = ">=0.2.0" +tiktoken = ">=0.7.0" +tokenizers = "*" + +[package.extras] +extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"] +proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=43.0.1,<44.0.0)", "fastapi (>=0.115.5,<0.116.0)", "fastapi-sso (>=0.16.0,<0.17.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.18,<0.0.19)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"] + [[package]] name = "loguru" -version = "0.7.2" +version = "0.7.3" description = "Python logging made (stupidly) simple" optional = false -python-versions = ">=3.5" +python-versions = "<4.0,>=3.5" files = [ - {file = "loguru-0.7.2-py3-none-any.whl", hash = "sha256:003d71e3d3ed35f0f8984898359d65b79e5b21943f78af86aa5491210429b8eb"}, - {file = "loguru-0.7.2.tar.gz", hash = "sha256:e671a53522515f34fd406340ee968cb9ecafbc4b36c679da03c18fd8d0bd51ac"}, + {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"}, + {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"}, ] [package.dependencies] @@ -422,7 +1115,7 @@ colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""} win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""} [package.extras] -dev = ["Sphinx (==7.2.5)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.2.2)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.4.1)", "mypy (==v1.5.1)", "pre-commit (==3.4.0)", "pytest (==6.1.2)", "pytest (==7.4.0)", "pytest-cov (==2.12.1)", "pytest-cov (==4.1.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.0.0)", "sphinx-autobuild (==2021.3.14)", "sphinx-rtd-theme (==1.3.0)", "tox (==3.27.1)", "tox (==4.11.0)"] +dev = ["Sphinx (==8.1.3)", "build (==1.2.2)", "colorama (==0.4.5)", "colorama (==0.4.6)", "exceptiongroup (==1.1.3)", "freezegun (==1.1.0)", "freezegun (==1.5.0)", "mypy (==v0.910)", "mypy (==v0.971)", "mypy (==v1.13.0)", "mypy (==v1.4.1)", "myst-parser (==4.0.0)", "pre-commit (==4.0.1)", "pytest (==6.1.2)", "pytest (==8.3.2)", "pytest-cov (==2.12.1)", "pytest-cov (==5.0.0)", "pytest-cov (==6.0.0)", "pytest-mypy-plugins (==1.9.3)", "pytest-mypy-plugins (==3.1.0)", "sphinx-rtd-theme (==3.0.2)", "tox (==3.27.1)", "tox (==4.23.2)", "twine (==6.0.1)"] [[package]] name = "lxml" @@ -593,6 +1286,76 @@ files = [ beautifulsoup4 = ">=4.9,<5" six = ">=1.15,<2" +[[package]] +name = "markupsafe" +version = "3.0.2" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.9" +files = [ + {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50"}, + {file = "MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d"}, + {file = "MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30"}, + {file = "MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1"}, + {file = "MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6"}, + {file = "MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eaa0a10b7f72326f1372a713e73c3f739b524b3af41feb43e4921cb529f5929a"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:48032821bbdf20f5799ff537c7ac3d1fba0ba032cfc06194faffa8cda8b560ff"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a9d3f5f0901fdec14d8d2f66ef7d035f2157240a433441719ac9a3fba440b13"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88b49a3b9ff31e19998750c38e030fc7bb937398b1f78cfa599aaef92d693144"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cfad01eed2c2e0c01fd0ecd2ef42c492f7f93902e39a42fc9ee1692961443a29"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:1225beacc926f536dc82e45f8a4d68502949dc67eea90eab715dea3a21c1b5f0"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:3169b1eefae027567d1ce6ee7cae382c57fe26e82775f460f0b2778beaad66c0"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:eb7972a85c54febfb25b5c4b4f3af4dcc731994c7da0d8a0b4a6eb0640e1d178"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-win32.whl", hash = "sha256:8c4e8c3ce11e1f92f6536ff07154f9d49677ebaaafc32db9db4620bc11ed480f"}, + {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"}, + {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"}, +] + [[package]] name = "mccabe" version = "0.7.0" @@ -604,6 +1367,110 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "multidict" +version = "6.1.0" +description = "multidict implementation" +optional = false +python-versions = ">=3.8" +files = [ + {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"}, + {file = "multidict-6.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a114d03b938376557927ab23f1e950827c3b893ccb94b62fd95d430fd0e5cf53"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1c416351ee6271b2f49b56ad7f308072f6f44b37118d69c2cad94f3fa8a40d5"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6b5d83030255983181005e6cfbac1617ce9746b219bc2aad52201ad121226581"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3e97b5e938051226dc025ec80980c285b053ffb1e25a3db2a3aa3bc046bf7f56"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d618649d4e70ac6efcbba75be98b26ef5078faad23592f9b51ca492953012429"}, + {file = "multidict-6.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10524ebd769727ac77ef2278390fb0068d83f3acb7773792a5080f2b0abf7748"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ff3827aef427c89a25cc96ded1759271a93603aba9fb977a6d264648ebf989db"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:06809f4f0f7ab7ea2cabf9caca7d79c22c0758b58a71f9d32943ae13c7ace056"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f179dee3b863ab1c59580ff60f9d99f632f34ccb38bf67a33ec6b3ecadd0fd76"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:aaed8b0562be4a0876ee3b6946f6869b7bcdb571a5d1496683505944e268b160"}, + {file = "multidict-6.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c8b88a2ccf5493b6c8da9076fb151ba106960a2df90c2633f342f120751a9e7"}, + {file = "multidict-6.1.0-cp310-cp310-win32.whl", hash = "sha256:4a9cb68166a34117d6646c0023c7b759bf197bee5ad4272f420a0141d7eb03a0"}, + {file = "multidict-6.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:20b9b5fbe0b88d0bdef2012ef7dee867f874b72528cf1d08f1d59b0e3850129d"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3efe2c2cb5763f2f1b275ad2bf7a287d3f7ebbef35648a9726e3b69284a4f3d6"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7053d3b0353a8b9de430a4f4b4268ac9a4fb3481af37dfe49825bf45ca24156"}, + {file = "multidict-6.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27e5fc84ccef8dfaabb09d82b7d179c7cf1a3fbc8a966f8274fcb4ab2eb4cadb"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e2b90b43e696f25c62656389d32236e049568b39320e2735d51f08fd362761b"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d83a047959d38a7ff552ff94be767b7fd79b831ad1cd9920662db05fec24fe72"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1a9dd711d0877a1ece3d2e4fea11a8e75741ca21954c919406b44e7cf971304"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec2abea24d98246b94913b76a125e855eb5c434f7c46546046372fe60f666351"}, + {file = "multidict-6.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4867cafcbc6585e4b678876c489b9273b13e9fff9f6d6d66add5e15d11d926cb"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b48204e8d955c47c55b72779802b219a39acc3ee3d0116d5080c388970b76e3"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8fff389528cad1618fb4b26b95550327495462cd745d879a8c7c2115248e399"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:a7a9541cd308eed5e30318430a9c74d2132e9a8cb46b901326272d780bf2d423"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:da1758c76f50c39a2efd5e9859ce7d776317eb1dd34317c8152ac9251fc574a3"}, + {file = "multidict-6.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c943a53e9186688b45b323602298ab727d8865d8c9ee0b17f8d62d14b56f0753"}, + {file = "multidict-6.1.0-cp311-cp311-win32.whl", hash = "sha256:90f8717cb649eea3504091e640a1b8568faad18bd4b9fcd692853a04475a4b80"}, + {file = "multidict-6.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:82176036e65644a6cc5bd619f65f6f19781e8ec2e5330f51aa9ada7504cc1926"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b04772ed465fa3cc947db808fa306d79b43e896beb677a56fb2347ca1a49c1fa"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6180c0ae073bddeb5a97a38c03f30c233e0a4d39cd86166251617d1bbd0af436"}, + {file = "multidict-6.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:071120490b47aa997cca00666923a83f02c7fbb44f71cf7f136df753f7fa8761"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50b3a2710631848991d0bf7de077502e8994c804bb805aeb2925a981de58ec2e"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b58c621844d55e71c1b7f7c498ce5aa6985d743a1a59034c57a905b3f153c1ef"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55b6d90641869892caa9ca42ff913f7ff1c5ece06474fbd32fb2cf6834726c95"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b820514bfc0b98a30e3d85462084779900347e4d49267f747ff54060cc33925"}, + {file = "multidict-6.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:10a9b09aba0c5b48c53761b7c720aaaf7cf236d5fe394cd399c7ba662d5f9966"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1e16bf3e5fc9f44632affb159d30a437bfe286ce9e02754759be5536b169b305"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76f364861c3bfc98cbbcbd402d83454ed9e01a5224bb3a28bf70002a230f73e2"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:820c661588bd01a0aa62a1283f20d2be4281b086f80dad9e955e690c75fb54a2"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:0e5f362e895bc5b9e67fe6e4ded2492d8124bdf817827f33c5b46c2fe3ffaca6"}, + {file = "multidict-6.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3ec660d19bbc671e3a6443325f07263be452c453ac9e512f5eb935e7d4ac28b3"}, + {file = "multidict-6.1.0-cp312-cp312-win32.whl", hash = "sha256:58130ecf8f7b8112cdb841486404f1282b9c86ccb30d3519faf301b2e5659133"}, + {file = "multidict-6.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:188215fc0aafb8e03341995e7c4797860181562380f81ed0a87ff455b70bf1f1"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d569388c381b24671589335a3be6e1d45546c2988c2ebe30fdcada8457a31008"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:052e10d2d37810b99cc170b785945421141bf7bb7d2f8799d431e7db229c385f"}, + {file = "multidict-6.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f90c822a402cb865e396a504f9fc8173ef34212a342d92e362ca498cad308e28"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b225d95519a5bf73860323e633a664b0d85ad3d5bede6d30d95b35d4dfe8805b"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:23bfd518810af7de1116313ebd9092cb9aa629beb12f6ed631ad53356ed6b86c"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c09fcfdccdd0b57867577b719c69e347a436b86cd83747f179dbf0cc0d4c1f3"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf6bea52ec97e95560af5ae576bdac3aa3aae0b6758c6efa115236d9e07dae44"}, + {file = "multidict-6.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57feec87371dbb3520da6192213c7d6fc892d5589a93db548331954de8248fd2"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0c3f390dc53279cbc8ba976e5f8035eab997829066756d811616b652b00a23a3"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:59bfeae4b25ec05b34f1956eaa1cb38032282cd4dfabc5056d0a1ec4d696d3aa"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b2f59caeaf7632cc633b5cf6fc449372b83bbdf0da4ae04d5be36118e46cc0aa"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:37bb93b2178e02b7b618893990941900fd25b6b9ac0fa49931a40aecdf083fe4"}, + {file = "multidict-6.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4e9f48f58c2c523d5a06faea47866cd35b32655c46b443f163d08c6d0ddb17d6"}, + {file = "multidict-6.1.0-cp313-cp313-win32.whl", hash = "sha256:3a37ffb35399029b45c6cc33640a92bef403c9fd388acce75cdc88f58bd19a81"}, + {file = "multidict-6.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:e9aa71e15d9d9beaad2c6b9319edcdc0a49a43ef5c0a4c8265ca9ee7d6c67774"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:db7457bac39421addd0c8449933ac32d8042aae84a14911a757ae6ca3eef1392"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d094ddec350a2fb899fec68d8353c78233debde9b7d8b4beeafa70825f1c281a"}, + {file = "multidict-6.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5845c1fd4866bb5dd3125d89b90e57ed3138241540897de748cdf19de8a2fca2"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9079dfc6a70abe341f521f78405b8949f96db48da98aeb43f9907f342f627cdc"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3914f5aaa0f36d5d60e8ece6a308ee1c9784cd75ec8151062614657a114c4478"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c08be4f460903e5a9d0f76818db3250f12e9c344e79314d1d570fc69d7f4eae4"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d093be959277cb7dee84b801eb1af388b6ad3ca6a6b6bf1ed7585895789d027d"}, + {file = "multidict-6.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3702ea6872c5a2a4eeefa6ffd36b042e9773f05b1f37ae3ef7264b1163c2dcf6"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2090f6a85cafc5b2db085124d752757c9d251548cedabe9bd31afe6363e0aff2"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:f67f217af4b1ff66c68a87318012de788dd95fcfeb24cc889011f4e1c7454dfd"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:189f652a87e876098bbc67b4da1049afb5f5dfbaa310dd67c594b01c10388db6"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:6bb5992037f7a9eff7991ebe4273ea7f51f1c1c511e6a2ce511d0e7bdb754492"}, + {file = "multidict-6.1.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f4c2b9e770c4e393876e35a7046879d195cd123b4f116d299d442b335bcd"}, + {file = "multidict-6.1.0-cp38-cp38-win32.whl", hash = "sha256:e27bbb6d14416713a8bd7aaa1313c0fc8d44ee48d74497a0ff4c3a1b6ccb5167"}, + {file = "multidict-6.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:22f3105d4fb15c8f57ff3959a58fcab6ce36814486500cd7485651230ad4d4ef"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:4e18b656c5e844539d506a0a06432274d7bd52a7487e6828c63a63d69185626c"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a185f876e69897a6f3325c3f19f26a297fa058c5e456bfcff8015e9a27e83ae1"}, + {file = "multidict-6.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ab7c4ceb38d91570a650dba194e1ca87c2b543488fe9309b4212694174fd539c"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e617fb6b0b6953fffd762669610c1c4ffd05632c138d61ac7e14ad187870669c"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:16e5f4bf4e603eb1fdd5d8180f1a25f30056f22e55ce51fb3d6ad4ab29f7d96f"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4c035da3f544b1882bac24115f3e2e8760f10a0107614fc9839fd232200b875"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:957cf8e4b6e123a9eea554fa7ebc85674674b713551de587eb318a2df3e00255"}, + {file = "multidict-6.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:483a6aea59cb89904e1ceabd2b47368b5600fb7de78a6e4a2c2987b2d256cf30"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:87701f25a2352e5bf7454caa64757642734da9f6b11384c1f9d1a8e699758057"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:682b987361e5fd7a139ed565e30d81fd81e9629acc7d925a205366877d8c8657"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ce2186a7df133a9c895dea3331ddc5ddad42cdd0d1ea2f0a51e5d161e4762f28"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9f636b730f7e8cb19feb87094949ba54ee5357440b9658b2a32a5ce4bce53972"}, + {file = "multidict-6.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:73eae06aa53af2ea5270cc066dcaf02cc60d2994bbb2c4ef5764949257d10f43"}, + {file = "multidict-6.1.0-cp39-cp39-win32.whl", hash = "sha256:1ca0083e80e791cffc6efce7660ad24af66c8d4079d2a750b29001b53ff59ada"}, + {file = "multidict-6.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:aa466da5b15ccea564bdab9c89175c762bc12825f4659c11227f515cee76fa4a"}, + {file = "multidict-6.1.0-py3-none-any.whl", hash = "sha256:48e171e52d1c4d33888e529b999e5900356b9ae588c2f09a52dcefb158b27506"}, + {file = "multidict-6.1.0.tar.gz", hash = "sha256:22ae2ebf9b0c69d206c003e2f6a914ea33f0a932d4aa16f236afc049d9958f4a"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""} + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -615,15 +1482,40 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "openai" +version = "1.59.3" +description = "The official Python library for the openai API" +optional = false +python-versions = ">=3.8" +files = [ + {file = "openai-1.59.3-py3-none-any.whl", hash = "sha256:b041887a0d8f3e70d1fc6ffbb2bf7661c3b9a2f3e806c04bf42f572b9ac7bc37"}, + {file = "openai-1.59.3.tar.gz", hash = "sha256:7f7fff9d8729968588edf1524e73266e8593bb6cab09298340efb755755bb66f"}, +] + +[package.dependencies] +anyio = ">=3.5.0,<5" +distro = ">=1.7.0,<2" +httpx = ">=0.23.0,<1" +jiter = ">=0.4.0,<1" +pydantic = ">=1.9.0,<3" +sniffio = "*" +tqdm = ">4" +typing-extensions = ">=4.11,<5" + +[package.extras] +datalib = ["numpy (>=1)", "pandas (>=1.2.3)", "pandas-stubs (>=1.1.0.11)"] +realtime = ["websockets (>=13,<15)"] + [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -648,6 +1540,94 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] +[[package]] +name = "pillow" +version = "11.1.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8"}, + {file = "pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07"}, + {file = "pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482"}, + {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e"}, + {file = "pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269"}, + {file = "pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49"}, + {file = "pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a"}, + {file = "pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65"}, + {file = "pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457"}, + {file = "pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6"}, + {file = "pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1"}, + {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2"}, + {file = "pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96"}, + {file = "pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f"}, + {file = "pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761"}, + {file = "pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71"}, + {file = "pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a"}, + {file = "pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1"}, + {file = "pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f"}, + {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91"}, + {file = "pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c"}, + {file = "pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6"}, + {file = "pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf"}, + {file = "pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5"}, + {file = "pillow-11.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae98e14432d458fc3de11a77ccb3ae65ddce70f730e7c76140653048c71bfcbc"}, + {file = "pillow-11.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cc1331b6d5a6e144aeb5e626f4375f5b7ae9934ba620c0ac6b3e43d5e683a0f0"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:758e9d4ef15d3560214cddbc97b8ef3ef86ce04d62ddac17ad39ba87e89bd3b1"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b523466b1a31d0dcef7c5be1f20b942919b62fd6e9a9be199d035509cbefc0ec"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9044b5e4f7083f209c4e35aa5dd54b1dd5b112b108648f5c902ad586d4f945c5"}, + {file = "pillow-11.1.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:3764d53e09cdedd91bee65c2527815d315c6b90d7b8b79759cc48d7bf5d4f114"}, + {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:31eba6bbdd27dde97b0174ddf0297d7a9c3a507a8a1480e1e60ef914fe23d352"}, + {file = "pillow-11.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b5d658fbd9f0d6eea113aea286b21d3cd4d3fd978157cbf2447a6035916506d3"}, + {file = "pillow-11.1.0-cp313-cp313-win32.whl", hash = "sha256:f86d3a7a9af5d826744fabf4afd15b9dfef44fe69a98541f666f66fbb8d3fef9"}, + {file = "pillow-11.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:593c5fd6be85da83656b93ffcccc2312d2d149d251e98588b14fbc288fd8909c"}, + {file = "pillow-11.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:11633d58b6ee5733bde153a8dafd25e505ea3d32e261accd388827ee987baf65"}, + {file = "pillow-11.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:70ca5ef3b3b1c4a0812b5c63c57c23b63e53bc38e758b37a951e5bc466449861"}, + {file = "pillow-11.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8000376f139d4d38d6851eb149b321a52bb8893a88dae8ee7d95840431977081"}, + {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ee85f0696a17dd28fbcfceb59f9510aa71934b483d1f5601d1030c3c8304f3c"}, + {file = "pillow-11.1.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:dd0e081319328928531df7a0e63621caf67652c8464303fd102141b785ef9547"}, + {file = "pillow-11.1.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e63e4e5081de46517099dc30abe418122f54531a6ae2ebc8680bcd7096860eab"}, + {file = "pillow-11.1.0-cp313-cp313t-win32.whl", hash = "sha256:dda60aa465b861324e65a78c9f5cf0f4bc713e4309f83bc387be158b077963d9"}, + {file = "pillow-11.1.0-cp313-cp313t-win_amd64.whl", hash = "sha256:ad5db5781c774ab9a9b2c4302bbf0c1014960a0a7be63278d13ae6fdf88126fe"}, + {file = "pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756"}, + {file = "pillow-11.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:bf902d7413c82a1bfa08b06a070876132a5ae6b2388e2712aab3a7cbc02205c6"}, + {file = "pillow-11.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c1eec9d950b6fe688edee07138993e54ee4ae634c51443cfb7c1e7613322718e"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e275ee4cb11c262bd108ab2081f750db2a1c0b8c12c1897f27b160c8bd57bbc"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4db853948ce4e718f2fc775b75c37ba2efb6aaea41a1a5fc57f0af59eee774b2"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ab8a209b8485d3db694fa97a896d96dd6533d63c22829043fd9de627060beade"}, + {file = "pillow-11.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:54251ef02a2309b5eec99d151ebf5c9904b77976c8abdcbce7891ed22df53884"}, + {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5bb94705aea800051a743aa4874bb1397d4695fb0583ba5e425ee0328757f196"}, + {file = "pillow-11.1.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:89dbdb3e6e9594d512780a5a1c42801879628b38e3efc7038094430844e271d8"}, + {file = "pillow-11.1.0-cp39-cp39-win32.whl", hash = "sha256:e5449ca63da169a2e6068dd0e2fcc8d91f9558aba89ff6d02121ca8ab11e79e5"}, + {file = "pillow-11.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:3362c6ca227e65c54bf71a5f88b3d4565ff1bcbc63ae72c34b07bbb1cc59a43f"}, + {file = "pillow-11.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:b20be51b37a75cc54c2c55def3fa2c65bb94ba859dde241cd0a4fd302de5ae0a"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73"}, + {file = "pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0"}, + {file = "pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage (>=7.4.2)", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout", "trove-classifiers (>=2024.10.12)"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "platformdirs" version = "4.3.6" @@ -666,18 +1646,18 @@ type = ["mypy (>=1.11.2)"] [[package]] name = "playwright" -version = "1.48.0" +version = "1.49.1" description = "A high-level API to automate web browsers" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "playwright-1.48.0-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:082bce2739f1078acc7d0734da8cc0e23eb91b7fae553f3316d733276f09a6b1"}, - {file = "playwright-1.48.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:7da2eb51a19c7f3b523e9faa9d98e7af92e52eb983a099979ea79c9668e3cbf7"}, - {file = "playwright-1.48.0-py3-none-macosx_11_0_universal2.whl", hash = "sha256:115b988d1da322358b77bc3bf2d3cc90f8c881e691461538e7df91614c4833c9"}, - {file = "playwright-1.48.0-py3-none-manylinux1_x86_64.whl", hash = "sha256:8dabb80e62f667fe2640a8b694e26a7b884c0b4803f7514a3954fc849126227b"}, - {file = "playwright-1.48.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ff8303409ebed76bed4c3d655340320b768817d900ba208b394fdd7d7939a5c"}, - {file = "playwright-1.48.0-py3-none-win32.whl", hash = "sha256:85598c360c590076d4f435525be991246d74a905b654ac19d26eab7ed9b98b2d"}, - {file = "playwright-1.48.0-py3-none-win_amd64.whl", hash = "sha256:e0e87b0c4dc8fce83c725dd851aec37bc4e882bb225ec8a96bd83cf32d4f1623"}, + {file = "playwright-1.49.1-py3-none-macosx_10_13_x86_64.whl", hash = "sha256:1041ffb45a0d0bc44d698d3a5aa3ac4b67c9bd03540da43a0b70616ad52592b8"}, + {file = "playwright-1.49.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9f38ed3d0c1f4e0a6d1c92e73dd9a61f8855133249d6f0cec28648d38a7137be"}, + {file = "playwright-1.49.1-py3-none-macosx_11_0_universal2.whl", hash = "sha256:3be48c6d26dc819ca0a26567c1ae36a980a0303dcd4249feb6f59e115aaddfb8"}, + {file = "playwright-1.49.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:753ca90ee31b4b03d165cfd36e477309ebf2b4381953f2a982ff612d85b147d2"}, + {file = "playwright-1.49.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd9bc8dab37aa25198a01f555f0a2e2c3813fe200fef018ac34dfe86b34994b9"}, + {file = "playwright-1.49.1-py3-none-win32.whl", hash = "sha256:43b304be67f096058e587dac453ece550eff87b8fbed28de30f4f022cc1745bb"}, + {file = "playwright-1.49.1-py3-none-win_amd64.whl", hash = "sha256:47b23cb346283278f5b4d1e1990bcb6d6302f80c0aa0ca93dd0601a1400191df"}, ] [package.dependencies] @@ -718,6 +1698,97 @@ tomli = {version = ">=1.2.2", markers = "python_version < \"3.11\""} [package.extras] poetry-plugin = ["poetry (>=1.0,<2.0)"] +[[package]] +name = "propcache" +version = "0.2.1" +description = "Accelerated property cache" +optional = false +python-versions = ">=3.9" +files = [ + {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6b3f39a85d671436ee3d12c017f8fdea38509e4f25b28eb25877293c98c243f6"}, + {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d51fbe4285d5db5d92a929e3e21536ea3dd43732c5b177c7ef03f918dff9f2"}, + {file = "propcache-0.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6445804cf4ec763dc70de65a3b0d9954e868609e83850a47ca4f0cb64bd79fea"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9479aa06a793c5aeba49ce5c5692ffb51fcd9a7016e017d555d5e2b0045d212"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9631c5e8b5b3a0fda99cb0d29c18133bca1e18aea9effe55adb3da1adef80d3"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3156628250f46a0895f1f36e1d4fbe062a1af8718ec3ebeb746f1d23f0c5dc4d"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6fb63ae352e13748289f04f37868099e69dba4c2b3e271c46061e82c745634"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:887d9b0a65404929641a9fabb6452b07fe4572b269d901d622d8a34a4e9043b2"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a96dc1fa45bd8c407a0af03b2d5218392729e1822b0c32e62c5bf7eeb5fb3958"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a7e65eb5c003a303b94aa2c3852ef130230ec79e349632d030e9571b87c4698c"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:999779addc413181912e984b942fbcc951be1f5b3663cd80b2687758f434c583"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:19a0f89a7bb9d8048d9c4370c9c543c396e894c76be5525f5e1ad287f1750ddf"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:1ac2f5fe02fa75f56e1ad473f1175e11f475606ec9bd0be2e78e4734ad575034"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:574faa3b79e8ebac7cb1d7930f51184ba1ccf69adfdec53a12f319a06030a68b"}, + {file = "propcache-0.2.1-cp310-cp310-win32.whl", hash = "sha256:03ff9d3f665769b2a85e6157ac8b439644f2d7fd17615a82fa55739bc97863f4"}, + {file = "propcache-0.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:2d3af2e79991102678f53e0dbf4c35de99b6b8b58f29a27ca0325816364caaba"}, + {file = "propcache-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ffc3cca89bb438fb9c95c13fc874012f7b9466b89328c3c8b1aa93cdcfadd16"}, + {file = "propcache-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f174bbd484294ed9fdf09437f889f95807e5f229d5d93588d34e92106fbf6717"}, + {file = "propcache-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:70693319e0b8fd35dd863e3e29513875eb15c51945bf32519ef52927ca883bc3"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b480c6a4e1138e1aa137c0079b9b6305ec6dcc1098a8ca5196283e8a49df95a9"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d27b84d5880f6d8aa9ae3edb253c59d9f6642ffbb2c889b78b60361eed449787"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:857112b22acd417c40fa4595db2fe28ab900c8c5fe4670c7989b1c0230955465"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf6c4150f8c0e32d241436526f3c3f9cbd34429492abddbada2ffcff506c51af"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66d4cfda1d8ed687daa4bc0274fcfd5267873db9a5bc0418c2da19273040eeb7"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c2f992c07c0fca81655066705beae35fc95a2fa7366467366db627d9f2ee097f"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4a571d97dbe66ef38e472703067021b1467025ec85707d57e78711c085984e54"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:bb6178c241278d5fe853b3de743087be7f5f4c6f7d6d22a3b524d323eecec505"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ad1af54a62ffe39cf34db1aa6ed1a1873bd548f6401db39d8e7cd060b9211f82"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e7048abd75fe40712005bcfc06bb44b9dfcd8e101dda2ecf2f5aa46115ad07ca"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:160291c60081f23ee43d44b08a7e5fb76681221a8e10b3139618c5a9a291b84e"}, + {file = "propcache-0.2.1-cp311-cp311-win32.whl", hash = "sha256:819ce3b883b7576ca28da3861c7e1a88afd08cc8c96908e08a3f4dd64a228034"}, + {file = "propcache-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:edc9fc7051e3350643ad929df55c451899bb9ae6d24998a949d2e4c87fb596d3"}, + {file = "propcache-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:081a430aa8d5e8876c6909b67bd2d937bfd531b0382d3fdedb82612c618bc41a"}, + {file = "propcache-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2ccec9ac47cf4e04897619c0e0c1a48c54a71bdf045117d3a26f80d38ab1fb0"}, + {file = "propcache-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:14d86fe14b7e04fa306e0c43cdbeebe6b2c2156a0c9ce56b815faacc193e320d"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:049324ee97bb67285b49632132db351b41e77833678432be52bdd0289c0e05e4"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cd9a1d071158de1cc1c71a26014dcdfa7dd3d5f4f88c298c7f90ad6f27bb46d"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98110aa363f1bb4c073e8dcfaefd3a5cea0f0834c2aab23dda657e4dab2f53b5"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:647894f5ae99c4cf6bb82a1bb3a796f6e06af3caa3d32e26d2350d0e3e3faf24"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfd3223c15bebe26518d58ccf9a39b93948d3dcb3e57a20480dfdd315356baff"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d71264a80f3fcf512eb4f18f59423fe82d6e346ee97b90625f283df56aee103f"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e73091191e4280403bde6c9a52a6999d69cdfde498f1fdf629105247599b57ec"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3935bfa5fede35fb202c4b569bb9c042f337ca4ff7bd540a0aa5e37131659348"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f508b0491767bb1f2b87fdfacaba5f7eddc2f867740ec69ece6d1946d29029a6"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1672137af7c46662a1c2be1e8dc78cb6d224319aaa40271c9257d886be4363a6"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b74c261802d3d2b85c9df2dfb2fa81b6f90deeef63c2db9f0e029a3cac50b518"}, + {file = "propcache-0.2.1-cp312-cp312-win32.whl", hash = "sha256:d09c333d36c1409d56a9d29b3a1b800a42c76a57a5a8907eacdbce3f18768246"}, + {file = "propcache-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:c214999039d4f2a5b2073ac506bba279945233da8c786e490d411dfc30f855c1"}, + {file = "propcache-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aca405706e0b0a44cc6bfd41fbe89919a6a56999157f6de7e182a990c36e37bc"}, + {file = "propcache-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:12d1083f001ace206fe34b6bdc2cb94be66d57a850866f0b908972f90996b3e9"}, + {file = "propcache-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d93f3307ad32a27bda2e88ec81134b823c240aa3abb55821a8da553eed8d9439"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba278acf14471d36316159c94a802933d10b6a1e117b8554fe0d0d9b75c9d536"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4e6281aedfca15301c41f74d7005e6e3f4ca143584ba696ac69df4f02f40d629"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b750a8e5a1262434fb1517ddf64b5de58327f1adc3524a5e44c2ca43305eb0b"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf72af5e0fb40e9babf594308911436c8efde3cb5e75b6f206c34ad18be5c052"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2d0a12018b04f4cb820781ec0dffb5f7c7c1d2a5cd22bff7fb055a2cb19ebce"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e800776a79a5aabdb17dcc2346a7d66d0777e942e4cd251defeb084762ecd17d"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:4160d9283bd382fa6c0c2b5e017acc95bc183570cd70968b9202ad6d8fc48dce"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:30b43e74f1359353341a7adb783c8f1b1c676367b011709f466f42fda2045e95"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:58791550b27d5488b1bb52bc96328456095d96206a250d28d874fafe11b3dfaf"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0f022d381747f0dfe27e99d928e31bc51a18b65bb9e481ae0af1380a6725dd1f"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:297878dc9d0a334358f9b608b56d02e72899f3b8499fc6044133f0d319e2ec30"}, + {file = "propcache-0.2.1-cp313-cp313-win32.whl", hash = "sha256:ddfab44e4489bd79bda09d84c430677fc7f0a4939a73d2bba3073036f487a0a6"}, + {file = "propcache-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:556fc6c10989f19a179e4321e5d678db8eb2924131e64652a51fe83e4c3db0e1"}, + {file = "propcache-0.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6a9a8c34fb7bb609419a211e59da8887eeca40d300b5ea8e56af98f6fbbb1541"}, + {file = "propcache-0.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ae1aa1cd222c6d205853b3013c69cd04515f9d6ab6de4b0603e2e1c33221303e"}, + {file = "propcache-0.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:accb6150ce61c9c4b7738d45550806aa2b71c7668c6942f17b0ac182b6142fd4"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eee736daafa7af6d0a2dc15cc75e05c64f37fc37bafef2e00d77c14171c2097"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7a31fc1e1bd362874863fdeed71aed92d348f5336fd84f2197ba40c59f061bd"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba4cfa1052819d16699e1d55d18c92b6e094d4517c41dd231a8b9f87b6fa681"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f089118d584e859c62b3da0892b88a83d611c2033ac410e929cb6754eec0ed16"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:781e65134efaf88feb447e8c97a51772aa75e48b794352f94cb7ea717dedda0d"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31f5af773530fd3c658b32b6bdc2d0838543de70eb9a2156c03e410f7b0d3aae"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:a7a078f5d37bee6690959c813977da5291b24286e7b962e62a94cec31aa5188b"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cea7daf9fc7ae6687cf1e2c049752f19f146fdc37c2cc376e7d0032cf4f25347"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:8b3489ff1ed1e8315674d0775dc7d2195fb13ca17b3808721b54dbe9fd020faf"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9403db39be1393618dd80c746cb22ccda168efce239c73af13c3763ef56ffc04"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5d97151bc92d2b2578ff7ce779cdb9174337390a535953cbb9452fb65164c587"}, + {file = "propcache-0.2.1-cp39-cp39-win32.whl", hash = "sha256:9caac6b54914bdf41bcc91e7eb9147d331d29235a7c967c150ef5df6464fd1bb"}, + {file = "propcache-0.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:92fc4500fcb33899b05ba73276dfb684a20d31caa567b7cb5252d48f896a91b1"}, + {file = "propcache-0.2.1-py3-none-any.whl", hash = "sha256:52277518d6aae65536e9cea52d4e7fd2f7a66f4aa2d30ed3f2fcea620ace3c54"}, + {file = "propcache-0.2.1.tar.gz", hash = "sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64"}, +] + [[package]] name = "pycodestyle" version = "2.12.1" @@ -731,22 +1802,19 @@ files = [ [[package]] name = "pydantic" -version = "2.9.2" +version = "2.10.4" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"}, - {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"}, + {file = "pydantic-2.10.4-py3-none-any.whl", hash = "sha256:597e135ea68be3a37552fb524bc7d0d66dcf93d395acd93a00682f1efcb8ee3d"}, + {file = "pydantic-2.10.4.tar.gz", hash = "sha256:82f12e9723da6de4fe2ba888b5971157b3be7ad914267dea8f05f82b28254f06"}, ] [package.dependencies] annotated-types = ">=0.6.0" -pydantic-core = "2.23.4" -typing-extensions = [ - {version = ">=4.12.2", markers = "python_version >= \"3.13\""}, - {version = ">=4.6.1", markers = "python_version < \"3.13\""}, -] +pydantic-core = "2.27.2" +typing-extensions = ">=4.12.2" [package.extras] email = ["email-validator (>=2.0.0)"] @@ -754,100 +1822,111 @@ timezone = ["tzdata"] [[package]] name = "pydantic-core" -version = "2.23.4" +version = "2.27.2" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"}, - {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"}, - {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"}, - {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"}, - {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"}, - {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"}, - {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"}, - {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"}, - {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"}, - {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"}, - {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"}, - {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"}, - {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"}, - {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"}, - {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"}, - {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"}, - {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"}, - {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"}, - {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"}, - {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"}, - {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"}, - {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"}, - {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"}, - {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"}, - {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"}, - {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"}, - {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"}, - {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"}, - {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"}, - {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"}, - {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"}, - {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"}, - {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"}, - {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"}, - {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"}, - {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"}, - {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"}, + {file = "pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa"}, + {file = "pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a"}, + {file = "pydantic_core-2.27.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236"}, + {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962"}, + {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9"}, + {file = "pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af"}, + {file = "pydantic_core-2.27.2-cp310-cp310-win32.whl", hash = "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4"}, + {file = "pydantic_core-2.27.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31"}, + {file = "pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc"}, + {file = "pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e68c4446fe0810e959cdff46ab0a41ce2f2c86d227d96dc3847af0ba7def306"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9640b0059ff4f14d1f37321b94061c6db164fbe49b334b31643e0528d100d99"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:40d02e7d45c9f8af700f3452f329ead92da4c5f4317ca9b896de7ce7199ea459"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c1fd185014191700554795c99b347d64f2bb637966c4cfc16998a0ca700d048"}, + {file = "pydantic_core-2.27.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d81d2068e1c1228a565af076598f9e7451712700b673de8f502f0334f281387d"}, + {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1a4207639fb02ec2dbb76227d7c751a20b1a6b4bc52850568e52260cae64ca3b"}, + {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:3de3ce3c9ddc8bbd88f6e0e304dea0e66d843ec9de1b0042b0911c1663ffd474"}, + {file = "pydantic_core-2.27.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:30c5f68ded0c36466acede341551106821043e9afaad516adfb6e8fa80a4e6a6"}, + {file = "pydantic_core-2.27.2-cp311-cp311-win32.whl", hash = "sha256:c70c26d2c99f78b125a3459f8afe1aed4d9687c24fd677c6a4436bc042e50d6c"}, + {file = "pydantic_core-2.27.2-cp311-cp311-win_amd64.whl", hash = "sha256:08e125dbdc505fa69ca7d9c499639ab6407cfa909214d500897d02afb816e7cc"}, + {file = "pydantic_core-2.27.2-cp311-cp311-win_arm64.whl", hash = "sha256:26f0d68d4b235a2bae0c3fc585c585b4ecc51382db0e3ba402a22cbc440915e4"}, + {file = "pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0"}, + {file = "pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2"}, + {file = "pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4"}, + {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3"}, + {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4"}, + {file = "pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57"}, + {file = "pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc"}, + {file = "pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9"}, + {file = "pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b"}, + {file = "pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b"}, + {file = "pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e"}, + {file = "pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4"}, + {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27"}, + {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee"}, + {file = "pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1"}, + {file = "pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130"}, + {file = "pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee"}, + {file = "pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b"}, + {file = "pydantic_core-2.27.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d3e8d504bdd3f10835468f29008d72fc8359d95c9c415ce6e767203db6127506"}, + {file = "pydantic_core-2.27.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:521eb9b7f036c9b6187f0b47318ab0d7ca14bd87f776240b90b21c1f4f149320"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:85210c4d99a0114f5a9481b44560d7d1e35e32cc5634c656bc48e590b669b145"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d716e2e30c6f140d7560ef1538953a5cd1a87264c737643d481f2779fc247fe1"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f66d89ba397d92f840f8654756196d93804278457b5fbede59598a1f9f90b228"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:669e193c1c576a58f132e3158f9dfa9662969edb1a250c54d8fa52590045f046"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdbe7629b996647b99c01b37f11170a57ae675375b14b8c13b8518b8320ced5"}, + {file = "pydantic_core-2.27.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d262606bf386a5ba0b0af3b97f37c83d7011439e3dc1a9298f21efb292e42f1a"}, + {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cabb9bcb7e0d97f74df8646f34fc76fbf793b7f6dc2438517d7a9e50eee4f14d"}, + {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:d2d63f1215638d28221f664596b1ccb3944f6e25dd18cd3b86b0a4c408d5ebb9"}, + {file = "pydantic_core-2.27.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bca101c00bff0adb45a833f8451b9105d9df18accb8743b08107d7ada14bd7da"}, + {file = "pydantic_core-2.27.2-cp38-cp38-win32.whl", hash = "sha256:f6f8e111843bbb0dee4cb6594cdc73e79b3329b526037ec242a3e49012495b3b"}, + {file = "pydantic_core-2.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:fd1aea04935a508f62e0d0ef1f5ae968774a32afc306fb8545e06f5ff5cdf3ad"}, + {file = "pydantic_core-2.27.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c10eb4f1659290b523af58fa7cffb452a61ad6ae5613404519aee4bfbf1df993"}, + {file = "pydantic_core-2.27.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef592d4bad47296fb11f96cd7dc898b92e795032b4894dfb4076cfccd43a9308"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c61709a844acc6bf0b7dce7daae75195a10aac96a596ea1b776996414791ede4"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c5f762659e47fdb7b16956c71598292f60a03aa92f8b6351504359dbdba6cf"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c9775e339e42e79ec99c441d9730fccf07414af63eac2f0e48e08fd38a64d76"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57762139821c31847cfb2df63c12f725788bd9f04bc2fb392790959b8f70f118"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d1e85068e818c73e048fe28cfc769040bb1f475524f4745a5dc621f75ac7630"}, + {file = "pydantic_core-2.27.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:097830ed52fd9e427942ff3b9bc17fab52913b2f50f2880dc4a5611446606a54"}, + {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:044a50963a614ecfae59bb1eaf7ea7efc4bc62f49ed594e18fa1e5d953c40e9f"}, + {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:4e0b4220ba5b40d727c7f879eac379b822eee5d8fff418e9d3381ee45b3b0362"}, + {file = "pydantic_core-2.27.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e4f4bb20d75e9325cc9696c6802657b58bc1dbbe3022f32cc2b2b632c3fbb96"}, + {file = "pydantic_core-2.27.2-cp39-cp39-win32.whl", hash = "sha256:cca63613e90d001b9f2f9a9ceb276c308bfa2a43fafb75c8031c4f66039e8c6e"}, + {file = "pydantic_core-2.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:77d1bca19b0f7021b3a982e6f903dcd5b2b06076def36a652e3907f596e29f67"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9"}, + {file = "pydantic_core-2.27.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c33939a82924da9ed65dab5a65d427205a73181d8098e79b6b426bdf8ad4e656"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:00bad2484fa6bda1e216e7345a798bd37c68fb2d97558edd584942aa41b7d278"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c817e2b40aba42bac6f457498dacabc568c3b7a986fc9ba7c8d9d260b71485fb"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:251136cdad0cb722e93732cb45ca5299fb56e1344a833640bf93b2803f8d1bfd"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2088237af596f0a524d3afc39ab3b036e8adb054ee57cbb1dcf8e09da5b29cc"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d4041c0b966a84b4ae7a09832eb691a35aec90910cd2dbe7a208de59be77965b"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:8083d4e875ebe0b864ffef72a4304827015cff328a1be6e22cc850753bfb122b"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f141ee28a0ad2123b6611b6ceff018039df17f32ada8b534e6aa039545a3efb2"}, + {file = "pydantic_core-2.27.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7d0c8399fcc1848491f00e0314bd59fb34a9c008761bcb422a057670c3f65e35"}, + {file = "pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39"}, ] [package.dependencies] @@ -883,21 +1962,21 @@ files = [ [[package]] name = "pylint" -version = "3.3.1" +version = "3.3.3" description = "python code static checker" optional = false python-versions = ">=3.9.0" files = [ - {file = "pylint-3.3.1-py3-none-any.whl", hash = "sha256:2f846a466dd023513240bc140ad2dd73bfc080a5d85a710afdb728c420a5a2b9"}, - {file = "pylint-3.3.1.tar.gz", hash = "sha256:9f3dcc87b1203e612b78d91a896407787e708b3f189b5fa0b307712d49ff0c6e"}, + {file = "pylint-3.3.3-py3-none-any.whl", hash = "sha256:26e271a2bc8bce0fc23833805a9076dd9b4d5194e2a02164942cb3cdc37b4183"}, + {file = "pylint-3.3.3.tar.gz", hash = "sha256:07c607523b17e6d16e2ae0d7ef59602e332caa762af64203c24b41c27139f36a"}, ] [package.dependencies] -astroid = ">=3.3.4,<=3.4.0-dev0" +astroid = ">=3.3.8,<=3.4.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ - {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.2", markers = "python_version < \"3.11\""}, + {version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.3.6", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, ] isort = ">=4.2.5,<5.13.0 || >5.13.0,<6" @@ -927,12 +2006,12 @@ pylint = ">=1.7" [[package]] name = "pylint-pydantic" -version = "0.3.2" +version = "0.3.4" description = "A Pylint plugin to help Pylint understand the Pydantic" optional = false python-versions = ">=3.8" files = [ - {file = "pylint_pydantic-0.3.2-py3-none-any.whl", hash = "sha256:e5cec02370aa68ac8eff138e5d573b0ac049bab864e9a6c3a9057cf043440aa1"}, + {file = "pylint_pydantic-0.3.4-py3-none-any.whl", hash = "sha256:f82fdf6b05045102fef2bd8b553a118aadf80155116f374a76eb201c47160a68"}, ] [package.dependencies] @@ -942,13 +2021,13 @@ pylint-plugin-utils = "*" [[package]] name = "pytest" -version = "8.3.3" +version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, - {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, + {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, + {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, ] [package.dependencies] @@ -980,6 +2059,20 @@ pytest = ">=8.2,<9" docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pyyaml" version = "6.0.2" @@ -1042,15 +2135,280 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "referencing" +version = "0.35.1" +description = "JSON Referencing + Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, + {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +rpds-py = ">=0.7.0" + +[[package]] +name = "regex" +version = "2024.11.6" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.8" +files = [ + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, + {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, + {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, + {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, + {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, + {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, + {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, + {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, + {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, + {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, + {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, + {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, + {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, + {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, +] + +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-file" +version = "2.1.0" +description = "File transport adapter for Requests" +optional = false +python-versions = "*" +files = [ + {file = "requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c"}, + {file = "requests_file-2.1.0.tar.gz", hash = "sha256:0f549a3f3b0699415ac04d167e9cb39bccfb730cb832b4d20be3d9867356e658"}, +] + +[package.dependencies] +requests = ">=1.0.0" + +[[package]] +name = "rpds-py" +version = "0.22.3" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "rpds_py-0.22.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:6c7b99ca52c2c1752b544e310101b98a659b720b21db00e65edca34483259967"}, + {file = "rpds_py-0.22.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:be2eb3f2495ba669d2a985f9b426c1797b7d48d6963899276d22f23e33d47e37"}, + {file = "rpds_py-0.22.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70eb60b3ae9245ddea20f8a4190bd79c705a22f8028aaf8bbdebe4716c3fab24"}, + {file = "rpds_py-0.22.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4041711832360a9b75cfb11b25a6a97c8fb49c07b8bd43d0d02b45d0b499a4ff"}, + {file = "rpds_py-0.22.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64607d4cbf1b7e3c3c8a14948b99345eda0e161b852e122c6bb71aab6d1d798c"}, + {file = "rpds_py-0.22.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e69b0a0e2537f26d73b4e43ad7bc8c8efb39621639b4434b76a3de50c6966e"}, + {file = "rpds_py-0.22.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc27863442d388870c1809a87507727b799c8460573cfbb6dc0eeaef5a11b5ec"}, + {file = "rpds_py-0.22.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e79dd39f1e8c3504be0607e5fc6e86bb60fe3584bec8b782578c3b0fde8d932c"}, + {file = "rpds_py-0.22.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e0fa2d4ec53dc51cf7d3bb22e0aa0143966119f42a0c3e4998293a3dd2856b09"}, + {file = "rpds_py-0.22.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fda7cb070f442bf80b642cd56483b5548e43d366fe3f39b98e67cce780cded00"}, + {file = "rpds_py-0.22.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cff63a0272fcd259dcc3be1657b07c929c466b067ceb1c20060e8d10af56f5bf"}, + {file = "rpds_py-0.22.3-cp310-cp310-win32.whl", hash = "sha256:9bd7228827ec7bb817089e2eb301d907c0d9827a9e558f22f762bb690b131652"}, + {file = "rpds_py-0.22.3-cp310-cp310-win_amd64.whl", hash = "sha256:9beeb01d8c190d7581a4d59522cd3d4b6887040dcfc744af99aa59fef3e041a8"}, + {file = "rpds_py-0.22.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d20cfb4e099748ea39e6f7b16c91ab057989712d31761d3300d43134e26e165f"}, + {file = "rpds_py-0.22.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:68049202f67380ff9aa52f12e92b1c30115f32e6895cd7198fa2a7961621fc5a"}, + {file = "rpds_py-0.22.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb4f868f712b2dd4bcc538b0a0c1f63a2b1d584c925e69a224d759e7070a12d5"}, + {file = "rpds_py-0.22.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bc51abd01f08117283c5ebf64844a35144a0843ff7b2983e0648e4d3d9f10dbb"}, + {file = "rpds_py-0.22.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0f3cec041684de9a4684b1572fe28c7267410e02450f4561700ca5a3bc6695a2"}, + {file = "rpds_py-0.22.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ef9d9da710be50ff6809fed8f1963fecdfecc8b86656cadfca3bc24289414b0"}, + {file = "rpds_py-0.22.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59f4a79c19232a5774aee369a0c296712ad0e77f24e62cad53160312b1c1eaa1"}, + {file = "rpds_py-0.22.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1a60bce91f81ddaac922a40bbb571a12c1070cb20ebd6d49c48e0b101d87300d"}, + {file = "rpds_py-0.22.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e89391e6d60251560f0a8f4bd32137b077a80d9b7dbe6d5cab1cd80d2746f648"}, + {file = "rpds_py-0.22.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e3fb866d9932a3d7d0c82da76d816996d1667c44891bd861a0f97ba27e84fc74"}, + {file = "rpds_py-0.22.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1352ae4f7c717ae8cba93421a63373e582d19d55d2ee2cbb184344c82d2ae55a"}, + {file = "rpds_py-0.22.3-cp311-cp311-win32.whl", hash = "sha256:b0b4136a252cadfa1adb705bb81524eee47d9f6aab4f2ee4fa1e9d3cd4581f64"}, + {file = "rpds_py-0.22.3-cp311-cp311-win_amd64.whl", hash = "sha256:8bd7c8cfc0b8247c8799080fbff54e0b9619e17cdfeb0478ba7295d43f635d7c"}, + {file = "rpds_py-0.22.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:27e98004595899949bd7a7b34e91fa7c44d7a97c40fcaf1d874168bb652ec67e"}, + {file = "rpds_py-0.22.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1978d0021e943aae58b9b0b196fb4895a25cc53d3956b8e35e0b7682eefb6d56"}, + {file = "rpds_py-0.22.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:655ca44a831ecb238d124e0402d98f6212ac527a0ba6c55ca26f616604e60a45"}, + {file = "rpds_py-0.22.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:feea821ee2a9273771bae61194004ee2fc33f8ec7db08117ef9147d4bbcbca8e"}, + {file = "rpds_py-0.22.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22bebe05a9ffc70ebfa127efbc429bc26ec9e9b4ee4d15a740033efda515cf3d"}, + {file = "rpds_py-0.22.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3af6e48651c4e0d2d166dc1b033b7042ea3f871504b6805ba5f4fe31581d8d38"}, + {file = "rpds_py-0.22.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e67ba3c290821343c192f7eae1d8fd5999ca2dc99994114643e2f2d3e6138b15"}, + {file = "rpds_py-0.22.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:02fbb9c288ae08bcb34fb41d516d5eeb0455ac35b5512d03181d755d80810059"}, + {file = "rpds_py-0.22.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f56a6b404f74ab372da986d240e2e002769a7d7102cc73eb238a4f72eec5284e"}, + {file = "rpds_py-0.22.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0a0461200769ab3b9ab7e513f6013b7a97fdeee41c29b9db343f3c5a8e2b9e61"}, + {file = "rpds_py-0.22.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8633e471c6207a039eff6aa116e35f69f3156b3989ea3e2d755f7bc41754a4a7"}, + {file = "rpds_py-0.22.3-cp312-cp312-win32.whl", hash = "sha256:593eba61ba0c3baae5bc9be2f5232430453fb4432048de28399ca7376de9c627"}, + {file = "rpds_py-0.22.3-cp312-cp312-win_amd64.whl", hash = "sha256:d115bffdd417c6d806ea9069237a4ae02f513b778e3789a359bc5856e0404cc4"}, + {file = "rpds_py-0.22.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ea7433ce7e4bfc3a85654aeb6747babe3f66eaf9a1d0c1e7a4435bbdf27fea84"}, + {file = "rpds_py-0.22.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6dd9412824c4ce1aca56c47b0991e65bebb7ac3f4edccfd3f156150c96a7bf25"}, + {file = "rpds_py-0.22.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20070c65396f7373f5df4005862fa162db5d25d56150bddd0b3e8214e8ef45b4"}, + {file = "rpds_py-0.22.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b09865a9abc0ddff4e50b5ef65467cd94176bf1e0004184eb915cbc10fc05c5"}, + {file = "rpds_py-0.22.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3453e8d41fe5f17d1f8e9c383a7473cd46a63661628ec58e07777c2fff7196dc"}, + {file = "rpds_py-0.22.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5d36399a1b96e1a5fdc91e0522544580dbebeb1f77f27b2b0ab25559e103b8b"}, + {file = "rpds_py-0.22.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:009de23c9c9ee54bf11303a966edf4d9087cd43a6003672e6aa7def643d06518"}, + {file = "rpds_py-0.22.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1aef18820ef3e4587ebe8b3bc9ba6e55892a6d7b93bac6d29d9f631a3b4befbd"}, + {file = "rpds_py-0.22.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f60bd8423be1d9d833f230fdbccf8f57af322d96bcad6599e5a771b151398eb2"}, + {file = "rpds_py-0.22.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:62d9cfcf4948683a18a9aff0ab7e1474d407b7bab2ca03116109f8464698ab16"}, + {file = "rpds_py-0.22.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9253fc214112405f0afa7db88739294295f0e08466987f1d70e29930262b4c8f"}, + {file = "rpds_py-0.22.3-cp313-cp313-win32.whl", hash = "sha256:fb0ba113b4983beac1a2eb16faffd76cb41e176bf58c4afe3e14b9c681f702de"}, + {file = "rpds_py-0.22.3-cp313-cp313-win_amd64.whl", hash = "sha256:c58e2339def52ef6b71b8f36d13c3688ea23fa093353f3a4fee2556e62086ec9"}, + {file = "rpds_py-0.22.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:f82a116a1d03628a8ace4859556fb39fd1424c933341a08ea3ed6de1edb0283b"}, + {file = "rpds_py-0.22.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3dfcbc95bd7992b16f3f7ba05af8a64ca694331bd24f9157b49dadeeb287493b"}, + {file = "rpds_py-0.22.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59259dc58e57b10e7e18ce02c311804c10c5a793e6568f8af4dead03264584d1"}, + {file = "rpds_py-0.22.3-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5725dd9cc02068996d4438d397e255dcb1df776b7ceea3b9cb972bdb11260a83"}, + {file = "rpds_py-0.22.3-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99b37292234e61325e7a5bb9689e55e48c3f5f603af88b1642666277a81f1fbd"}, + {file = "rpds_py-0.22.3-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:27b1d3b3915a99208fee9ab092b8184c420f2905b7d7feb4aeb5e4a9c509b8a1"}, + {file = "rpds_py-0.22.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f612463ac081803f243ff13cccc648578e2279295048f2a8d5eb430af2bae6e3"}, + {file = "rpds_py-0.22.3-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f73d3fef726b3243a811121de45193c0ca75f6407fe66f3f4e183c983573e130"}, + {file = "rpds_py-0.22.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3f21f0495edea7fdbaaa87e633a8689cd285f8f4af5c869f27bc8074638ad69c"}, + {file = "rpds_py-0.22.3-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:1e9663daaf7a63ceccbbb8e3808fe90415b0757e2abddbfc2e06c857bf8c5e2b"}, + {file = "rpds_py-0.22.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a76e42402542b1fae59798fab64432b2d015ab9d0c8c47ba7addddbaf7952333"}, + {file = "rpds_py-0.22.3-cp313-cp313t-win32.whl", hash = "sha256:69803198097467ee7282750acb507fba35ca22cc3b85f16cf45fb01cb9097730"}, + {file = "rpds_py-0.22.3-cp313-cp313t-win_amd64.whl", hash = "sha256:f5cf2a0c2bdadf3791b5c205d55a37a54025c6e18a71c71f82bb536cf9a454bf"}, + {file = "rpds_py-0.22.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:378753b4a4de2a7b34063d6f95ae81bfa7b15f2c1a04a9518e8644e81807ebea"}, + {file = "rpds_py-0.22.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3445e07bf2e8ecfeef6ef67ac83de670358abf2996916039b16a218e3d95e97e"}, + {file = "rpds_py-0.22.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b2513ba235829860b13faa931f3b6846548021846ac808455301c23a101689d"}, + {file = "rpds_py-0.22.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eaf16ae9ae519a0e237a0f528fd9f0197b9bb70f40263ee57ae53c2b8d48aeb3"}, + {file = "rpds_py-0.22.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:583f6a1993ca3369e0f80ba99d796d8e6b1a3a2a442dd4e1a79e652116413091"}, + {file = "rpds_py-0.22.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4617e1915a539a0d9a9567795023de41a87106522ff83fbfaf1f6baf8e85437e"}, + {file = "rpds_py-0.22.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c150c7a61ed4a4f4955a96626574e9baf1adf772c2fb61ef6a5027e52803543"}, + {file = "rpds_py-0.22.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2fa4331c200c2521512595253f5bb70858b90f750d39b8cbfd67465f8d1b596d"}, + {file = "rpds_py-0.22.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:214b7a953d73b5e87f0ebece4a32a5bd83c60a3ecc9d4ec8f1dca968a2d91e99"}, + {file = "rpds_py-0.22.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f47ad3d5f3258bd7058d2d506852217865afefe6153a36eb4b6928758041d831"}, + {file = "rpds_py-0.22.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f276b245347e6e36526cbd4a266a417796fc531ddf391e43574cf6466c492520"}, + {file = "rpds_py-0.22.3-cp39-cp39-win32.whl", hash = "sha256:bbb232860e3d03d544bc03ac57855cd82ddf19c7a07651a7c0fdb95e9efea8b9"}, + {file = "rpds_py-0.22.3-cp39-cp39-win_amd64.whl", hash = "sha256:cfbc454a2880389dbb9b5b398e50d439e2e58669160f27b60e5eca11f68ae17c"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:d48424e39c2611ee1b84ad0f44fb3b2b53d473e65de061e3f460fc0be5f1939d"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:24e8abb5878e250f2eb0d7859a8e561846f98910326d06c0d51381fed59357bd"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b232061ca880db21fa14defe219840ad9b74b6158adb52ddf0e87bead9e8493"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac0a03221cdb5058ce0167ecc92a8c89e8d0decdc9e99a2ec23380793c4dcb96"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb0c341fa71df5a4595f9501df4ac5abfb5a09580081dffbd1ddd4654e6e9123"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf9db5488121b596dbfc6718c76092fda77b703c1f7533a226a5a9f65248f8ad"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b8db6b5b2d4491ad5b6bdc2bc7c017eec108acbf4e6785f42a9eb0ba234f4c9"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b3d504047aba448d70cf6fa22e06cb09f7cbd761939fdd47604f5e007675c24e"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e61b02c3f7a1e0b75e20c3978f7135fd13cb6cf551bf4a6d29b999a88830a338"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:e35ba67d65d49080e8e5a1dd40101fccdd9798adb9b050ff670b7d74fa41c566"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:26fd7cac7dd51011a245f29a2cc6489c4608b5a8ce8d75661bb4a1066c52dfbe"}, + {file = "rpds_py-0.22.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:177c7c0fce2855833819c98e43c262007f42ce86651ffbb84f37883308cb0e7d"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bb47271f60660803ad11f4c61b42242b8c1312a31c98c578f79ef9387bbde21c"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:70fb28128acbfd264eda9bf47015537ba3fe86e40d046eb2963d75024be4d055"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44d61b4b7d0c2c9ac019c314e52d7cbda0ae31078aabd0f22e583af3e0d79723"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f0e260eaf54380380ac3808aa4ebe2d8ca28b9087cf411649f96bad6900c728"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b25bc607423935079e05619d7de556c91fb6adeae9d5f80868dde3468657994b"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fb6116dfb8d1925cbdb52595560584db42a7f664617a1f7d7f6e32f138cdf37d"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a63cbdd98acef6570c62b92a1e43266f9e8b21e699c363c0fef13bd530799c11"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2b8f60e1b739a74bab7e01fcbe3dddd4657ec685caa04681df9d562ef15b625f"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:2e8b55d8517a2fda8d95cb45d62a5a8bbf9dd0ad39c5b25c8833efea07b880ca"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:2de29005e11637e7a2361fa151f780ff8eb2543a0da1413bb951e9f14b699ef3"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:666ecce376999bf619756a24ce15bb14c5bfaf04bf00abc7e663ce17c3f34fe7"}, + {file = "rpds_py-0.22.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:5246b14ca64a8675e0a7161f7af68fe3e910e6b90542b4bfb5439ba752191df6"}, + {file = "rpds_py-0.22.3.tar.gz", hash = "sha256:e32fee8ab45d3c2db6da19a5323bc3362237c8b653c70194414b892fd06a080d"}, +] + [[package]] name = "six" -version = "1.16.0" +version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] [[package]] @@ -1075,15 +2433,145 @@ files = [ {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, ] +[[package]] +name = "tiktoken" +version = "0.8.0" +description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" +optional = false +python-versions = ">=3.9" +files = [ + {file = "tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e"}, + {file = "tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9269348cb650726f44dd3bbb3f9110ac19a8dcc8f54949ad3ef652ca22a38e21"}, + {file = "tiktoken-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e13f37bc4ef2d012731e93e0fef21dc3b7aea5bb9009618de9a4026844e560"}, + {file = "tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f13d13c981511331eac0d01a59b5df7c0d4060a8be1e378672822213da51e0a2"}, + {file = "tiktoken-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:6b2ddbc79a22621ce8b1166afa9f9a888a664a579350dc7c09346a3b5de837d9"}, + {file = "tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005"}, + {file = "tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1"}, + {file = "tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a"}, + {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d"}, + {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47"}, + {file = "tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419"}, + {file = "tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99"}, + {file = "tiktoken-0.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:881839cfeae051b3628d9823b2e56b5cc93a9e2efb435f4cf15f17dc45f21586"}, + {file = "tiktoken-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fe9399bdc3f29d428f16a2f86c3c8ec20be3eac5f53693ce4980371c3245729b"}, + {file = "tiktoken-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a58deb7075d5b69237a3ff4bb51a726670419db6ea62bdcd8bd80c78497d7ab"}, + {file = "tiktoken-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2908c0d043a7d03ebd80347266b0e58440bdef5564f84f4d29fb235b5df3b04"}, + {file = "tiktoken-0.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:294440d21a2a51e12d4238e68a5972095534fe9878be57d905c476017bff99fc"}, + {file = "tiktoken-0.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:d8f3192733ac4d77977432947d563d7e1b310b96497acd3c196c9bddb36ed9db"}, + {file = "tiktoken-0.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:02be1666096aff7da6cbd7cdaa8e7917bfed3467cd64b38b1f112e96d3b06a24"}, + {file = "tiktoken-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c94ff53c5c74b535b2cbf431d907fc13c678bbd009ee633a2aca269a04389f9a"}, + {file = "tiktoken-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b231f5e8982c245ee3065cd84a4712d64692348bc609d84467c57b4b72dcbc5"}, + {file = "tiktoken-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4177faa809bd55f699e88c96d9bb4635d22e3f59d635ba6fd9ffedf7150b9953"}, + {file = "tiktoken-0.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5376b6f8dc4753cd81ead935c5f518fa0fbe7e133d9e25f648d8c4dabdd4bad7"}, + {file = "tiktoken-0.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:18228d624807d66c87acd8f25fc135665617cab220671eb65b50f5d70fa51f69"}, + {file = "tiktoken-0.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7e17807445f0cf1f25771c9d86496bd8b5c376f7419912519699f3cc4dc5c12e"}, + {file = "tiktoken-0.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:886f80bd339578bbdba6ed6d0567a0d5c6cfe198d9e587ba6c447654c65b8edc"}, + {file = "tiktoken-0.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6adc8323016d7758d6de7313527f755b0fc6c72985b7d9291be5d96d73ecd1e1"}, + {file = "tiktoken-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b591fb2b30d6a72121a80be24ec7a0e9eb51c5500ddc7e4c2496516dd5e3816b"}, + {file = "tiktoken-0.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:845287b9798e476b4d762c3ebda5102be87ca26e5d2c9854002825d60cdb815d"}, + {file = "tiktoken-0.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:1473cfe584252dc3fa62adceb5b1c763c1874e04511b197da4e6de51d6ce5a02"}, + {file = "tiktoken-0.8.0.tar.gz", hash = "sha256:9ccbb2740f24542534369c5635cfd9b2b3c2490754a78ac8831d99f89f94eeb2"}, +] + +[package.dependencies] +regex = ">=2022.1.18" +requests = ">=2.26.0" + +[package.extras] +blobfile = ["blobfile (>=2)"] + +[[package]] +name = "tldextract" +version = "5.1.3" +description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." +optional = false +python-versions = ">=3.9" +files = [ + {file = "tldextract-5.1.3-py3-none-any.whl", hash = "sha256:78de310cc2ca018692de5ddf320f9d6bd7c5cf857d0fd4f2175f0cdf4440ea75"}, + {file = "tldextract-5.1.3.tar.gz", hash = "sha256:d43c7284c23f5dc8a42fd0fee2abede2ff74cc622674e4cb07f514ab3330c338"}, +] + +[package.dependencies] +filelock = ">=3.0.8" +idna = "*" +requests = ">=2.1.0" +requests-file = ">=1.4" + +[package.extras] +release = ["build", "twine"] +testing = ["mypy", "pytest", "pytest-gitignore", "pytest-mock", "responses", "ruff", "syrupy", "tox", "tox-uv", "types-filelock", "types-requests"] + +[[package]] +name = "tokenizers" +version = "0.21.0" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tokenizers-0.21.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3c4c93eae637e7d2aaae3d376f06085164e1660f89304c0ab2b1d08a406636b2"}, + {file = "tokenizers-0.21.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f53ea537c925422a2e0e92a24cce96f6bc5046bbef24a1652a5edc8ba975f62e"}, + {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b177fb54c4702ef611de0c069d9169f0004233890e0c4c5bd5508ae05abf193"}, + {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6b43779a269f4629bebb114e19c3fca0223296ae9fea8bb9a7a6c6fb0657ff8e"}, + {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9aeb255802be90acfd363626753fda0064a8df06031012fe7d52fd9a905eb00e"}, + {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d8b09dbeb7a8d73ee204a70f94fc06ea0f17dcf0844f16102b9f414f0b7463ba"}, + {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:400832c0904f77ce87c40f1a8a27493071282f785724ae62144324f171377273"}, + {file = "tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e84ca973b3a96894d1707e189c14a774b701596d579ffc7e69debfc036a61a04"}, + {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:eb7202d231b273c34ec67767378cd04c767e967fda12d4a9e36208a34e2f137e"}, + {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:089d56db6782a73a27fd8abf3ba21779f5b85d4a9f35e3b493c7bbcbbf0d539b"}, + {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:c87ca3dc48b9b1222d984b6b7490355a6fdb411a2d810f6f05977258400ddb74"}, + {file = "tokenizers-0.21.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4145505a973116f91bc3ac45988a92e618a6f83eb458f49ea0790df94ee243ff"}, + {file = "tokenizers-0.21.0-cp39-abi3-win32.whl", hash = "sha256:eb1702c2f27d25d9dd5b389cc1f2f51813e99f8ca30d9e25348db6585a97e24a"}, + {file = "tokenizers-0.21.0-cp39-abi3-win_amd64.whl", hash = "sha256:87841da5a25a3a5f70c102de371db120f41873b854ba65e52bccd57df5a3780c"}, + {file = "tokenizers-0.21.0.tar.gz", hash = "sha256:ee0894bf311b75b0c03079f33859ae4b2334d675d4e93f5a4132e1eae2834fe4"}, +] + +[package.dependencies] +huggingface-hub = ">=0.16.4,<1.0" + +[package.extras] +dev = ["tokenizers[testing]"] +docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] +testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] + [[package]] name = "tomli" -version = "2.0.2" +version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" files = [ - {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, - {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] [[package]] @@ -1097,6 +2585,27 @@ files = [ {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, ] +[[package]] +name = "tqdm" +version = "4.67.1" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] +discord = ["requests"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -1108,21 +2617,153 @@ files = [ {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, ] +[[package]] +name = "urllib3" +version = "2.3.0" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.9" +files = [ + {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, + {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "win32-setctime" -version = "1.1.0" +version = "1.2.0" description = "A small Python utility to set file creation time on Windows" optional = false python-versions = ">=3.5" files = [ - {file = "win32_setctime-1.1.0-py3-none-any.whl", hash = "sha256:231db239e959c2fe7eb1d7dc129f11172354f98361c4fa2d6d2d7e278baa8aad"}, - {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, + {file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"}, + {file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"}, ] [package.extras] dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] +[[package]] +name = "yarl" +version = "1.18.3" +description = "Yet another URL library" +optional = false +python-versions = ">=3.9" +files = [ + {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34"}, + {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7"}, + {file = "yarl-1.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:602d98f2c2d929f8e697ed274fbadc09902c4025c5a9963bf4e9edfc3ab6f7ed"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c654d5207c78e0bd6d749f6dae1dcbbfde3403ad3a4b11f3c5544d9906969dde"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5094d9206c64181d0f6e76ebd8fb2f8fe274950a63890ee9e0ebfd58bf9d787b"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35098b24e0327fc4ebdc8ffe336cee0a87a700c24ffed13161af80124b7dc8e5"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3236da9272872443f81fedc389bace88408f64f89f75d1bdb2256069a8730ccc"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2c08cc9b16f4f4bc522771d96734c7901e7ebef70c6c5c35dd0f10845270bcd"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80316a8bd5109320d38eef8833ccf5f89608c9107d02d2a7f985f98ed6876990"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c1e1cc06da1491e6734f0ea1e6294ce00792193c463350626571c287c9a704db"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fea09ca13323376a2fdfb353a5fa2e59f90cd18d7ca4eaa1fd31f0a8b4f91e62"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e3b9fd71836999aad54084906f8663dffcd2a7fb5cdafd6c37713b2e72be1760"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:757e81cae69244257d125ff31663249b3013b5dc0a8520d73694aed497fb195b"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b1771de9944d875f1b98a745bc547e684b863abf8f8287da8466cf470ef52690"}, + {file = "yarl-1.18.3-cp310-cp310-win32.whl", hash = "sha256:8874027a53e3aea659a6d62751800cf6e63314c160fd607489ba5c2edd753cf6"}, + {file = "yarl-1.18.3-cp310-cp310-win_amd64.whl", hash = "sha256:93b2e109287f93db79210f86deb6b9bbb81ac32fc97236b16f7433db7fc437d8"}, + {file = "yarl-1.18.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8503ad47387b8ebd39cbbbdf0bf113e17330ffd339ba1144074da24c545f0069"}, + {file = "yarl-1.18.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02ddb6756f8f4517a2d5e99d8b2f272488e18dd0bfbc802f31c16c6c20f22193"}, + {file = "yarl-1.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:67a283dd2882ac98cc6318384f565bffc751ab564605959df4752d42483ad889"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d980e0325b6eddc81331d3f4551e2a333999fb176fd153e075c6d1c2530aa8a8"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b643562c12680b01e17239be267bc306bbc6aac1f34f6444d1bded0c5ce438ca"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c017a3b6df3a1bd45b9fa49a0f54005e53fbcad16633870104b66fa1a30a29d8"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75674776d96d7b851b6498f17824ba17849d790a44d282929c42dbb77d4f17ae"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ccaa3a4b521b780a7e771cc336a2dba389a0861592bbce09a476190bb0c8b4b3"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2d06d3005e668744e11ed80812e61efd77d70bb7f03e33c1598c301eea20efbb"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:9d41beda9dc97ca9ab0b9888cb71f7539124bc05df02c0cff6e5acc5a19dcc6e"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ba23302c0c61a9999784e73809427c9dbedd79f66a13d84ad1b1943802eaaf59"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6748dbf9bfa5ba1afcc7556b71cda0d7ce5f24768043a02a58846e4a443d808d"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0b0cad37311123211dc91eadcb322ef4d4a66008d3e1bdc404808992260e1a0e"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fb2171a4486bb075316ee754c6d8382ea6eb8b399d4ec62fde2b591f879778a"}, + {file = "yarl-1.18.3-cp311-cp311-win32.whl", hash = "sha256:61b1a825a13bef4a5f10b1885245377d3cd0bf87cba068e1d9a88c2ae36880e1"}, + {file = "yarl-1.18.3-cp311-cp311-win_amd64.whl", hash = "sha256:b9d60031cf568c627d028239693fd718025719c02c9f55df0a53e587aab951b5"}, + {file = "yarl-1.18.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1dd4bdd05407ced96fed3d7f25dbbf88d2ffb045a0db60dbc247f5b3c5c25d50"}, + {file = "yarl-1.18.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7c33dd1931a95e5d9a772d0ac5e44cac8957eaf58e3c8da8c1414de7dd27c576"}, + {file = "yarl-1.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b411eddcfd56a2f0cd6a384e9f4f7aa3efee14b188de13048c25b5e91f1640"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:436c4fc0a4d66b2badc6c5fc5ef4e47bb10e4fd9bf0c79524ac719a01f3607c2"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e35ef8683211db69ffe129a25d5634319a677570ab6b2eba4afa860f54eeaf75"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84b2deecba4a3f1a398df819151eb72d29bfeb3b69abb145a00ddc8d30094512"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e5a1fea0fd4f5bfa7440a47eff01d9822a65b4488f7cff83155a0f31a2ecba"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0e883008013c0e4aef84dcfe2a0b172c4d23c2669412cf5b3371003941f72bb"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a3f356548e34a70b0172d8890006c37be92995f62d95a07b4a42e90fba54272"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ccd17349166b1bee6e529b4add61727d3f55edb7babbe4069b5764c9587a8cc6"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b958ddd075ddba5b09bb0be8a6d9906d2ce933aee81100db289badbeb966f54e"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c7d79f7d9aabd6011004e33b22bc13056a3e3fb54794d138af57f5ee9d9032cb"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4891ed92157e5430874dad17b15eb1fda57627710756c27422200c52d8a4e393"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ce1af883b94304f493698b00d0f006d56aea98aeb49d75ec7d98cd4a777e9285"}, + {file = "yarl-1.18.3-cp312-cp312-win32.whl", hash = "sha256:f91c4803173928a25e1a55b943c81f55b8872f0018be83e3ad4938adffb77dd2"}, + {file = "yarl-1.18.3-cp312-cp312-win_amd64.whl", hash = "sha256:7e2ee16578af3b52ac2f334c3b1f92262f47e02cc6193c598502bd46f5cd1477"}, + {file = "yarl-1.18.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:90adb47ad432332d4f0bc28f83a5963f426ce9a1a8809f5e584e704b82685dcb"}, + {file = "yarl-1.18.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:913829534200eb0f789d45349e55203a091f45c37a2674678744ae52fae23efa"}, + {file = "yarl-1.18.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef9f7768395923c3039055c14334ba4d926f3baf7b776c923c93d80195624782"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a19f62ff30117e706ebc9090b8ecc79aeb77d0b1f5ec10d2d27a12bc9f66d0"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e17c9361d46a4d5addf777c6dd5eab0715a7684c2f11b88c67ac37edfba6c482"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a74a13a4c857a84a845505fd2d68e54826a2cd01935a96efb1e9d86c728e186"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41f7ce59d6ee7741af71d82020346af364949314ed3d87553763a2df1829cc58"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f52a265001d830bc425f82ca9eabda94a64a4d753b07d623a9f2863fde532b53"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:82123d0c954dc58db301f5021a01854a85bf1f3bb7d12ae0c01afc414a882ca2"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2ec9bbba33b2d00999af4631a3397d1fd78290c48e2a3e52d8dd72db3a067ac8"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:fbd6748e8ab9b41171bb95c6142faf068f5ef1511935a0aa07025438dd9a9bc1"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:877d209b6aebeb5b16c42cbb377f5f94d9e556626b1bfff66d7b0d115be88d0a"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b464c4ab4bfcb41e3bfd3f1c26600d038376c2de3297760dfe064d2cb7ea8e10"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8d39d351e7faf01483cc7ff7c0213c412e38e5a340238826be7e0e4da450fdc8"}, + {file = "yarl-1.18.3-cp313-cp313-win32.whl", hash = "sha256:61ee62ead9b68b9123ec24bc866cbef297dd266175d53296e2db5e7f797f902d"}, + {file = "yarl-1.18.3-cp313-cp313-win_amd64.whl", hash = "sha256:578e281c393af575879990861823ef19d66e2b1d0098414855dd367e234f5b3c"}, + {file = "yarl-1.18.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:61e5e68cb65ac8f547f6b5ef933f510134a6bf31bb178be428994b0cb46c2a04"}, + {file = "yarl-1.18.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe57328fbc1bfd0bd0514470ac692630f3901c0ee39052ae47acd1d90a436719"}, + {file = "yarl-1.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a440a2a624683108a1b454705ecd7afc1c3438a08e890a1513d468671d90a04e"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09c7907c8548bcd6ab860e5f513e727c53b4a714f459b084f6580b49fa1b9cee"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4f6450109834af88cb4cc5ecddfc5380ebb9c228695afc11915a0bf82116789"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9ca04806f3be0ac6d558fffc2fdf8fcef767e0489d2684a21912cc4ed0cd1b8"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77a6e85b90a7641d2e07184df5557132a337f136250caafc9ccaa4a2a998ca2c"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6333c5a377c8e2f5fae35e7b8f145c617b02c939d04110c76f29ee3676b5f9a5"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0b3c92fa08759dbf12b3a59579a4096ba9af8dd344d9a813fc7f5070d86bbab1"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:4ac515b860c36becb81bb84b667466885096b5fc85596948548b667da3bf9f24"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:045b8482ce9483ada4f3f23b3774f4e1bf4f23a2d5c912ed5170f68efb053318"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:a4bb030cf46a434ec0225bddbebd4b89e6471814ca851abb8696170adb163985"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:54d6921f07555713b9300bee9c50fb46e57e2e639027089b1d795ecd9f7fa910"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1d407181cfa6e70077df3377938c08012d18893f9f20e92f7d2f314a437c30b1"}, + {file = "yarl-1.18.3-cp39-cp39-win32.whl", hash = "sha256:ac36703a585e0929b032fbaab0707b75dc12703766d0b53486eabd5139ebadd5"}, + {file = "yarl-1.18.3-cp39-cp39-win_amd64.whl", hash = "sha256:ba87babd629f8af77f557b61e49e7c7cac36f22f871156b91e10a6e9d4f829e9"}, + {file = "yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b"}, + {file = "yarl-1.18.3.tar.gz", hash = "sha256:ac1801c45cbf77b6c99242eeff4fffb5e4e73a800b5c4ad4fc0be5def634d2e1"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" +propcache = ">=0.2.0" + +[[package]] +name = "zipp" +version = "3.21.0" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.9" +files = [ + {file = "zipp-3.21.0-py3-none-any.whl", hash = "sha256:ac1bbe05fd2991f160ebce24ffbac5f6d11d83dc90891255885223d42b3cd931"}, + {file = "zipp-3.21.0.tar.gz", hash = "sha256:2c9958f6430a2040341a52eb608ed6dd93ef4392e02ffe219417c1b28b5dd1f4"}, +] + +[package.extras] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"] +cover = ["pytest-cov"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +enabler = ["pytest-enabler (>=2.2)"] +test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] +type = ["pytest-mypy"] + [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "c8bdf6306d9ba54b5a62aa438d1be669fef3c801ec1c3a6b7b89de1283c61bdd" +content-hash = "8f623a140b1f5a63f967123172463d07d41c4269d840262fd6dbcd2f4bab4b6d" diff --git a/pyproject.toml b/pyproject.toml index 070e9e7..89801b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,11 @@ typing-extensions = "^4.12.0" loguru = "^0.7.2" httpx = "^0.27.2" markdownify = "^0.13.1" +litellm = "^1.57.0" +pillow = "^11.0.0" +json-repair = "^0.30.1" +tldextract = "^5.1.3" +anthropic = "^0.42.0" [tool.poetry.group.dev.dependencies] @@ -45,13 +50,15 @@ pylint-pydantic = "^0.3.2" flake8 = "^7.1.1" pytest = "^8.3.3" pytest-asyncio = "^0.24.0" +autoflake = "^2.3.1" +isort = "^5.13.2" [tool.pylint] load-plugins = "pylint_pydantic" [tool.poe.tasks] generate_sync = "python scripts/generate_sync.py" -format_sync = "python -m black ./dendrite/sync_api/" +format_sync = "python -m black ./dendrite//browser/sync_api/" build_sync = ["generate_sync", "format_sync"] test_sync = "pytest tests/tests_sync" diff --git a/scripts/generate_sync.py b/scripts/generate_sync.py index b6d69c0..7d27ef0 100644 --- a/scripts/generate_sync.py +++ b/scripts/generate_sync.py @@ -1,10 +1,10 @@ -import os import ast -import shutil import logging +import os +import shutil import subprocess import sys -from typing import Dict, Any +from typing import Any, Dict logging.basicConfig(level=logging.WARNING) @@ -138,7 +138,7 @@ def visit_Import(self, node): alias = ast.alias(name="time", asname=alias.asname) elif alias.name.startswith("dendrite"): new_name = alias.name.replace( - "dendrite.async_api", "dendrite.sync_api", 1 + "dendrite.browser.async_api", "dendrite.browser.sync_api", 1 ) alias = ast.alias(name=new_name, asname=alias.asname) new_names.append(alias) @@ -160,7 +160,7 @@ def visit_ImportFrom(self, node): node.module = "time" elif node.module and node.module.startswith("dendrite"): node.module = node.module.replace( - "dendrite.async_api", "dendrite.sync_api", 1 + "dendrite.browser.async_api", "dendrite.browser.sync_api", 1 ) return node @@ -279,8 +279,8 @@ def get_uncommitted_diff(folder): if __name__ == "__main__": - source_dir = "dendrite/async_api" - target_dir = "dendrite/sync_api" + source_dir = "dendrite/browser/async_api" + target_dir = "dendrite/browser/sync_api" renames = { "AsyncBrowserbaseDownload": "BrowserbaseDownload", "AsyncBrowserbaseBrowser": "BrowserbaseBrowser", @@ -289,6 +289,7 @@ def get_uncommitted_diff(folder): "AsyncPage": "Page", "AsyncDendriteRemoteBrowser": "DendriteRemoteBrowser", "AsyncElementsResponse": "ElementsResponse", + "AsyncLogicEngine": "LogicEngine", } if check_for_uncommitted_changes(target_dir): diff --git a/test.py b/test.py new file mode 100644 index 0000000..7df191e --- /dev/null +++ b/test.py @@ -0,0 +1,26 @@ +from dendrite import AsyncDendrite + + +async def send_email(to, subject, message): + client = AsyncDendrite(auth="outlook.live.com") + + # Navigate + await client.goto( + "https://outlook.live.com/mail/0/", expected_page="An email inbox" + ) + + # Create new email and populate fields + await client.click("The new email button") + await client.fill("The recipient field", to) + await client.press("Enter") + await client.fill("The subject field", subject) + await client.fill("The message field", message) + + # Send email + await client.press("Enter", hold_cmd=True) + + +if __name__ == "__main__": + import asyncio + + asyncio.run(send_email("charles@dendrite.systems", "Hello", "This is a test email")) diff --git a/tests/tests_async/conftest.py b/tests/tests_async/conftest.py index c74c2cd..49499da 100644 --- a/tests/tests_async/conftest.py +++ b/tests/tests_async/conftest.py @@ -1,13 +1,8 @@ import pytest -import asyncio - import pytest_asyncio -from dendrite.async_api._core.dendrite_browser import ( - AsyncDendrite, -) -from dendrite.remote import ( - BrowserbaseConfig, -) # Import your class here + +from dendrite import AsyncDendrite +from dendrite.remote import BrowserbaseConfig # Import your class here @pytest_asyncio.fixture(scope="session") @@ -18,9 +13,6 @@ async def dendrite_browser(): The fixture has a session scope, so it will only be initialized once for the entire test session. """ async with AsyncDendrite( - openai_api_key="your_openai_api_key", - dendrite_api_key="your_dendrite_api_key", - anthropic_api_key="your_anthropic_api_key", playwright_options={"headless": True}, ) as browser: yield browser # Provide the browser to tests @@ -34,9 +26,6 @@ async def browserbase(): The fixture has a session scope, so it will only be initialized once for the entire test session. """ async with AsyncDendrite( - openai_api_key="your_openai_api_key", - dendrite_api_key="your_dendrite_api_key", - anthropic_api_key="your_anthropic_api_key", playwright_options={"headless": True}, remote_config=BrowserbaseConfig(), ) as browser: diff --git a/tests/tests_async/test_browserbase.py b/tests/tests_async/test_browserbase.py index 616b3fd..9ab6d46 100644 --- a/tests/tests_async/test_browserbase.py +++ b/tests/tests_async/test_browserbase.py @@ -1,6 +1,6 @@ # import os # import pytest -# from dendrite.async_api._core.dendrite_browser import AsyncDendrite +# from dendrite.browser.async_api._core.dendrite_browser import AsyncDendrite # @pytest.mark.asyncio(loop_scope="session") diff --git a/tests/tests_async/test_download.py b/tests/tests_async/test_download.py index 608bef0..a1b39af 100644 --- a/tests/tests_async/test_download.py +++ b/tests/tests_async/test_download.py @@ -1,8 +1,8 @@ -import asyncio import os + import pytest -from dendrite.async_api._core.dendrite_browser import AsyncDendrite +from dendrite import AsyncDendrite pytest_plugins = ("pytest_asyncio",) diff --git a/tests/tests_async/tests.py b/tests/tests_async/tests.py index 7e7915a..841e803 100644 --- a/tests/tests_async/tests.py +++ b/tests/tests_async/tests.py @@ -1,4 +1,5 @@ import pytest + from dendrite import AsyncDendrite diff --git a/tests/tests_sync/conftest.py b/tests/tests_sync/conftest.py index e7fbfc0..fd9fc87 100644 --- a/tests/tests_sync/conftest.py +++ b/tests/tests_sync/conftest.py @@ -1,5 +1,6 @@ import pytest -from dendrite.sync_api import Dendrite + +from dendrite import Dendrite @pytest.fixture(scope="session") @@ -10,9 +11,6 @@ def dendrite_browser(): The fixture has a session scope, so it will only be initialized once for the entire test session. """ browser = Dendrite( - openai_api_key="your_openai_api_key", - dendrite_api_key="your_dendrite_api_key", - anthropic_api_key="your_anthropic_api_key", playwright_options={"headless": True}, ) # Launch the browser diff --git a/tests/tests_sync/test_context.py b/tests/tests_sync/test_context.py index a1a0f4f..06713dd 100644 --- a/tests/tests_sync/test_context.py +++ b/tests/tests_sync/test_context.py @@ -1,13 +1,9 @@ # content of test_tmp_path.py -import os -from dendrite.sync_api import Dendrite +from dendrite import Dendrite def test_context_manager(): with Dendrite( - openai_api_key="your_openai_api_key", - dendrite_api_key="your_dendrite_api_key", - anthropic_api_key="your_anthropic_api_key", playwright_options={"headless": True}, ) as browser: browser.goto("https://dendrite.systems") diff --git a/tests/tests_sync/test_download.py b/tests/tests_sync/test_download.py index 4ddbe14..db02e2e 100644 --- a/tests/tests_sync/test_download.py +++ b/tests/tests_sync/test_download.py @@ -1,6 +1,7 @@ # content of test_tmp_path.py import os -from dendrite.sync_api import Dendrite + +from dendrite import Dendrite def test_download(dendrite_browser: Dendrite, tmp_path):