diff --git a/CHANGELOG.md b/CHANGELOG.md index c8593d4b..8af4d634 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,29 +1,30 @@ -## [1.17.0-beta.11](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.17.0-beta.10...v1.17.0-beta.11) (2024-09-07) +## [1.18.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.18.0...v1.18.1) (2024-09-08) -### Features +### Bug Fixes -* add scrape_do_integration ([94e69a0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/94e69a051591aeec1e7268bf0d5e0338f90e9539)) -* fetch_node improved ([167f970](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/167f97040f081867cecff542c3af8aa122499ce8)) +* **browser_base_fetch:** correct function signature and async_mode handling ([007ff08](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/007ff084c68d419fac040d9b5cca3980458cfabc)) -## [1.17.0-beta.10](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.17.0-beta.9...v1.17.0-beta.10) (2024-09-07) +## [1.18.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.17.0...v1.18.0) (2024-09-08) -### Bug Fixes -* screenshot_scraper ([ef7a589](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ef7a5891dcb1b4ed8a97947f5563fa78af917ecb)) +### Features + +* **browser_base_fetch:** add async_mode to support both synchronous and asynchronous execution ([d56253d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d56253d183969584cacc0cb164daa0152462f21c)) + +## [1.17.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.16.0...v1.17.0) (2024-09-08) -## [1.17.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.17.0-beta.8...v1.17.0-beta.9) (2024-09-06) ### Features -* ConcatNode.py added for heavy merge operations ([bd4b26d](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/bd4b26d7d7c1a7953d1bc9d78b436007880028c9)) +* **docloaders:** Enhance browser_base_fetch function flexibility ([57fd01f](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/57fd01f9a76ea8ea69ec04b7238ab58ca72ac8f4)) -## [1.17.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.17.0-beta.7...v1.17.0-beta.8) (2024-09-06) +### Docs -### Features +* **sponsor:** 🅱️ Browserbase sponsor 🅱️ ([a540139](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/a5401394cc939d9a5fc58b8a9145141c2f047bab)) * **AbstractGraph:** add adjustable rate limit ([2859fb7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2859fb72d699f26b617ed2f949cdcfca1671c5c8)) @@ -98,6 +99,7 @@ * **release:** 1.16.0-beta.3 [skip ci] ([886c987](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/886c987172bb57fb59863e4d7b494797bba16980)) * **release:** 1.16.0-beta.4 [skip ci] ([ba5c7ad](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ba5c7adcea138d993005377f4cfe438795e1b124)) + ## [1.16.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.15.2...v1.16.0) (2024-09-01) diff --git a/README.md b/README.md index d9058936..32068761 100644 --- a/README.md +++ b/README.md @@ -32,27 +32,32 @@ playwright install **Note**: it is recommended to install the library in a virtual environment to avoid conflicts with other libraries 🐱 -By the way if you to use not mandatory modules it is necessary to install by yourself with the following command: +
+Optional Dependencies +Additional dependecies can be added while installing the library: + +- More Language Models: additional language models are installed, such as Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints. -### Installing "Other Language Models" This group allows you to use additional language models like Fireworks, Groq, Anthropic, Together AI, Hugging Face, and Nvidia AI Endpoints. ```bash pip install scrapegraphai[other-language-models] -``` -### Installing "More Semantic Options" +- Semantic Options: this group includes tools for advanced semantic processing, such as Graphviz. + + ```bash + pip install scrapegraphai[more-semantic-options] + ``` + +- Browsers Options: this group includes additional browser management tools/services, such as Browserbase. + + ```bash + pip install scrapegraphai[more-browser-options] + ``` + +
-This group includes tools for advanced semantic processing, such as Graphviz. -```bash -pip install scrapegraphai[more-semantic-options] -``` -### Installing "More Browser Options" -This group includes additional browser management options, such as BrowserBase. -```bash -pip install scrapegraphai[more-browser-options] -``` ### Installing "More Browser Options" @@ -135,6 +140,9 @@ Check out also the Docusaurus [here](https://scrapegraph-doc.onrender.com/). ## 🏆 Sponsors
+ + Browserbase + SerpAPI diff --git a/docs/assets/browserbase_logo.png b/docs/assets/browserbase_logo.png new file mode 100644 index 00000000..bd16f2e1 Binary files /dev/null and b/docs/assets/browserbase_logo.png differ diff --git a/docs/source/introduction/overview.rst b/docs/source/introduction/overview.rst index 00a76d5d..506770a5 100644 --- a/docs/source/introduction/overview.rst +++ b/docs/source/introduction/overview.rst @@ -82,6 +82,11 @@ FAQ Sponsors ======== +.. image:: ../../assets/browserbase_logo.png + :width: 10% + :alt: Browserbase + :target: https://www.browserbase.com/ + .. image:: ../../assets/serp_api_logo.png :width: 10% :alt: Serp API diff --git a/scrapegraphai/docloaders/browser_base.py b/scrapegraphai/docloaders/browser_base.py index 318c9f38..c9413d68 100644 --- a/scrapegraphai/docloaders/browser_base.py +++ b/scrapegraphai/docloaders/browser_base.py @@ -3,7 +3,7 @@ """ from typing import List -def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[str]: +def browser_base_fetch(api_key: str, project_id: str, link: List[str], text_content: bool = True, async_mode: bool = False) -> List[str]: """ BrowserBase Fetch @@ -13,6 +13,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s - `api_key`: The API key provided by BrowserBase. - `project_id`: The ID of the project on BrowserBase where you want to fetch data from. - `link`: The URL or link that you want to fetch data from. + - `text_content`: A boolean flag to specify whether to return only the text content (True) or the full HTML (False). + - `async_mode`: A boolean flag that determines whether the function runs asynchronously (True) or synchronously (False, default). It initializes a Browserbase object with the given API key and project ID, then uses this object to load the specified link. @@ -35,6 +37,8 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s api_key (str): The API key provided by BrowserBase. project_id (str): The ID of the project on BrowserBase where you want to fetch data from. link (str): The URL or link that you want to fetch data from. + text_content (bool): Whether to return only the text content (True) or the full HTML (False). Defaults to True. + async_mode (bool): Whether to run the function asynchronously (True) or synchronously (False). Defaults to False. Returns: object: The result of the loading operation. @@ -49,7 +53,19 @@ def browser_base_fetch(api_key: str, project_id: str, link: List[str]) -> List[s browserbase = Browserbase(api_key=api_key, project_id=project_id) result = [] - for l in link: - result.append(browserbase.load(l, text_content=True)) + async def _async_fetch_link(l): + return await asyncio.to_thread(browserbase.load, l, text_content=text_content) + + if async_mode: + async def _async_browser_base_fetch(): + for l in link: + result.append(await _async_fetch_link(l)) + return result + + result = asyncio.run(_async_browser_base_fetch()) + else: + for l in link: + result.append(browserbase.load(l, text_content=text_content)) + return result