From f73343f19386b31878706963597c2565a023068d Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:09:58 +0200 Subject: [PATCH 1/3] fix(AbstractGraph): correct and simplify instancing logic --- scrapegraphai/graphs/abstract_graph.py | 130 +++++++------------------ scrapegraphai/helpers/models_tokens.py | 4 +- 2 files changed, 39 insertions(+), 95 deletions(-) diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 2dcc54f9..03fd30e2 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -125,103 +125,47 @@ def _create_llm(self, llm_config: dict) -> object: self.model_token = llm_params["model_tokens"] except KeyError as exc: raise KeyError("model_tokens not specified") from exc - return llm_params["model_instance"] - - def handle_model(model_name, provider, token_key, default_token=8192): - try: - self.model_token = models_tokens[provider][token_key] - except KeyError: - print(f"Model not found, using default token size ({default_token})") - self.model_token = default_token - llm_params["model_provider"] = provider - llm_params["model"] = model_name - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - return init_chat_model(**llm_params) - - known_models = {"chatgpt","gpt","openai", "azure_openai", "google_genai", - "ollama", "oneapi", "nvidia", "groq", "google_vertexai", - "bedrock", "mistralai", "hugging_face", "deepseek", "ernie", - "fireworks", "claude-3-"} - - if llm_params["model"].split("/")[0] not in known_models and llm_params["model"].split("-")[0] not in known_models: - raise ValueError(f"Model '{llm_params['model']}' is not supported") - + return llm_params["model_instance"] + + known_providers = {"openai", "azure_openai", "google_genai", "google_vertexai", + "ollama", "oneapi", "nvidia", "groq", "anthropic" "bedrock", "mistralai", + "hugging_face", "deepseek", "ernie", "fireworks"} + + split_model_provider = llm_params["model"].split("/") + llm_params["model_provider"] = split_model_provider[0] + llm_params["model"] = split_model_provider[1:] + + if llm_params["model_provider"] not in known_providers: + raise ValueError(f"Provider {llm_params['model_provider']} is not supported. If possible, try to use a model instance instead.") + try: - if "fireworks" in llm_params["model"]: - model_name = "/".join(llm_params["model"].split("/")[1:]) - token_key = llm_params["model"].split("/")[-1] - return handle_model(model_name, "fireworks", token_key) - - elif "gemini" in llm_params["model"]: - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, "google_genai", model_name) - - elif llm_params["model"].startswith("claude"): - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, "anthropic", model_name) - - elif llm_params["model"].startswith("vertexai"): - return handle_model(llm_params["model"], "google_vertexai", llm_params["model"]) - - elif "gpt-" in llm_params["model"]: - return handle_model(llm_params["model"], "openai", llm_params["model"]) - - elif "ollama" in llm_params["model"]: - model_name = llm_params["model"].split("ollama/")[-1] - token_key = model_name if "model_tokens" not in llm_params else None - model_tokens = 8192 if "model_tokens" not in llm_params else llm_params["model_tokens"] - return handle_model(model_name, "ollama", token_key, model_tokens) - - elif "claude-3-" in llm_params["model"]: - return handle_model(llm_params["model"], "anthropic", "claude3") - - elif llm_params["model"].startswith("mistral"): - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, "mistralai", model_name) - - elif "deepseek" in llm_params["model"]: - try: - self.model_token = models_tokens["deepseek"][llm_params["model"]] - except KeyError: - print("model not found, using default token size (8192)") - self.model_token = 8192 - return DeepSeek(llm_params) - - elif "ernie" in llm_params["model"]: - from langchain_community.chat_models import ErnieBotChat - - try: - self.model_token = models_tokens["ernie"][llm_params["model"]] - except KeyError: - print("model not found, using default token size (8192)") - self.model_token = 8192 - return ErnieBotChat(llm_params) - - elif "oneapi" in llm_params["model"]: - llm_params["model"] = llm_params["model"].split("/")[-1] - try: - self.model_token = models_tokens["oneapi"][llm_params["model"]] - except KeyError: - raise KeyError("Model not supported") - return OneApi(llm_params) - - elif "nvidia" in llm_params["model"]: - from langchain_nvidia_ai_endpoints import ChatNVIDIA - - try: - self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]] - llm_params["model"] = "/".join(llm_params["model"].split("/")[1:]) - except KeyError: - raise KeyError("Model not supported") - return ChatNVIDIA(llm_params) + self.model_token = models_tokens[llm_params["model"]][llm_params["model"]] + except KeyError: + print("Model not found, using default token size (8192)") + self.model_token = 8192 + try: + if llm_params["model_provider"] not in {"oneapi", "nvidia", "ernie", "deepseek"}: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + return init_chat_model(**llm_params) else: - model_name = llm_params["model"].split("/")[-1] - return handle_model(model_name, llm_params["model"], model_name) + if "deepseek" in llm_params["model"]: + return DeepSeek(**llm_params) + + if "ernie" in llm_params["model"]: + from langchain_community.chat_models import ErnieBotChat + return ErnieBotChat(**llm_params) + + if "oneapi" in llm_params["model"]: + return OneApi(**llm_params) + + if "nvidia" in llm_params["model"]: + from langchain_nvidia_ai_endpoints import ChatNVIDIA + return ChatNVIDIA(**llm_params) - except KeyError as e: - print(f"Model not supported: {e}") + except Exception as e: + print(f"Error instancing model: {e}") def get_state(self, key=None) -> dict: diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 7677a901..d4c9f39e 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -102,7 +102,7 @@ "oneapi": { "qwen-turbo": 6000, }, - "nvdia": { + "nvidia": { "meta/llama3-70b-instruct": 419, "meta/llama3-8b-instruct": 419, "nemotron-4-340b-instruct": 1024, @@ -127,7 +127,7 @@ "gemma-7b-it": 8192, "claude-3-haiku-20240307'": 8192, }, - "claude": { + "anthropic": { "claude_instant": 100000, "claude2": 9000, "claude2.1": 200000, From f6df9b75125b4cacbef4af29faf3e17a13ff108c Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 12:30:48 +0200 Subject: [PATCH 2/3] chore(examples): update model names --- .../csv_scraper_graph_multi_haiku.py | 4 ++-- examples/anthropic/csv_scraper_haiku.py | 5 ++-- examples/anthropic/custom_graph_haiku.py | 24 ++++--------------- examples/anthropic/json_scraper_haiku.py | 5 ++-- .../anthropic/json_scraper_multi_haiku.py | 5 ++-- examples/anthropic/pdf_scraper_graph_haiku.py | 5 ++-- examples/anthropic/pdf_scraper_multi_haiku.py | 5 ++-- examples/anthropic/scrape_plain_text_haiku.py | 5 ++-- examples/anthropic/script_generator_haiku.py | 5 ++-- .../anthropic/script_multi_generator_haiku.py | 7 +++--- examples/anthropic/search_graph_haiku.py | 5 ++-- .../anthropic/search_graph_schema_haiku.py | 4 ++-- examples/anthropic/search_link_graph_haiku.py | 20 ++++------------ examples/anthropic/smart_scraper_haiku.py | 5 ++-- .../anthropic/smart_scraper_multi_haiku.py | 5 ++-- .../anthropic/smart_scraper_schema_haiku.py | 4 ++-- .../xml_scraper_graph_multi_haiku.py | 4 ++-- examples/anthropic/xml_scraper_haiku.py | 5 ++-- examples/deepseek/csv_scraper_deepseek.py | 2 +- .../csv_scraper_graph_multi_deepseek.py | 2 +- examples/deepseek/json_scraper_deepseek.py | 2 +- .../deepseek/json_scraper_multi_deepseek.py | 2 +- .../deepseek/pdf_scraper_graph_deepseek.py | 2 +- .../deepseek/pdf_scraper_multi_deepseek.py | 2 +- .../deepseek/scrape_plain_text_deepseek.py | 2 +- .../deepseek/script_generator_deepseek.py | 2 +- .../script_multi_generator_deepseek.py | 2 +- examples/deepseek/search_graph_deepseek.py | 2 +- .../deepseek/search_graph_schema_deepseek.py | 2 +- .../deepseek/search_link_graph_deepseek.py | 2 +- examples/deepseek/smart_scraper_deepseek.py | 2 +- .../deepseek/smart_scraper_multi_deepseek.py | 2 +- .../deepseek/smart_scraper_schema_deepseek.py | 2 +- examples/deepseek/xml_scraper_deepseek.py | 2 +- .../xml_scraper_graph_multi_deepseek.py | 2 +- examples/ernie/csv_scraper_ernie.py | 14 ++++------- examples/ernie/custom_graph_ernie.py | 20 +++++++--------- examples/ernie/deep_scraper_ernie.py | 2 +- examples/ernie/json_scraper_ernie.py | 18 ++++++-------- examples/ernie/pdf_scraper_graph_ernie.py | 14 ++++------- examples/ernie/scrape_plain_text_ernie.py | 18 ++++++-------- examples/ernie/script_generator_ernie.py | 10 ++++---- .../ernie/script_multi_generator_ernie.py | 16 +++++-------- examples/ernie/search_graph_ernie.py | 16 +++++-------- examples/ernie/search_link_graph_ernie.py | 16 +++++-------- examples/ernie/smart_scraper_ernie.py | 13 +++++----- examples/ernie/smart_scraper_multi_ernie.py | 10 ++++---- examples/ernie/smart_scraper_schema_ernie.py | 8 ++++--- examples/ernie/speech_graph_ernie.py | 7 +++--- examples/ernie/xml_scraper_ernie.py | 6 +++-- .../csv_scraper_graph_multi_mistral.py | 2 +- examples/mistral/csv_scraper_mistral.py | 2 +- examples/mistral/custom_graph_mistral.py | 2 +- examples/mistral/deep_scraper_mistral.py | 2 +- examples/mistral/json_scraper_mistral.py | 2 +- .../mistral/json_scraper_multi_mistral.py | 2 +- examples/mistral/md_scraper_mistral.py | 2 +- examples/mistral/pdf_scraper_mistral.py | 2 +- examples/mistral/pdf_scraper_multi_mistral.py | 2 +- examples/mistral/scrape_plain_text_mistral.py | 2 +- examples/mistral/script_generator_mistral.py | 2 +- .../script_generator_schema_mistral.py | 2 +- .../mistral/script_multi_generator_mistral.py | 2 +- examples/mistral/search_graph_mistral.py | 2 +- .../mistral/search_graph_schema_mistral.py | 2 +- examples/mistral/search_link_graph_mistral.py | 2 +- examples/mistral/smart_scraper_mistral.py | 2 +- .../mistral/smart_scraper_multi_mistral.py | 2 +- .../mistral/smart_scraper_schema_mistral.py | 2 +- .../xml_scraper_graph_multi_mistral.py | 2 +- examples/mistral/xml_scraper_mistral.py | 2 +- .../openai/csv_scraper_graph_multi_openai.py | 2 +- examples/openai/csv_scraper_openai.py | 2 +- examples/openai/custom_graph_openai.py | 2 +- examples/openai/deep_scraper_openai.py | 2 +- examples/openai/json_scraper_multi_openai.py | 2 +- examples/openai/json_scraper_openai.py | 2 +- examples/openai/md_scraper_openai.py | 2 +- examples/openai/omni_scraper_openai.py | 2 +- examples/openai/omni_search_openai.py | 2 +- examples/openai/pdf_scraper_multi_openai.py | 2 +- examples/openai/pdf_scraper_openai.py | 2 +- examples/openai/scrape_plain_text_openai.py | 2 +- examples/openai/screenshot_scraper.py | 2 +- examples/openai/script_generator_openai.py | 2 +- .../openai/script_generator_schema_openai.py | 2 +- .../openai/script_multi_generator_openai.py | 2 +- examples/openai/search_graph_openai.py | 2 +- examples/openai/search_graph_schema_openai.py | 2 +- examples/openai/search_link_graph_openai.py | 2 +- examples/openai/smart_scraper_multi_openai.py | 2 +- examples/openai/smart_scraper_openai.py | 2 +- .../openai/smart_scraper_schema_openai.py | 2 +- examples/openai/speech_graph_openai.py | 2 +- .../openai/xml_scraper_graph_multi_openai.py | 2 +- examples/openai/xml_scraper_openai.py | 2 +- examples/single_node/kg_node.py | 2 +- examples/single_node/robot_node.py | 4 ++-- examples/single_node/search_internet_node.py | 4 ++-- scrapegraphai/helpers/models_tokens.py | 3 ++- 100 files changed, 192 insertions(+), 257 deletions(-) diff --git a/examples/anthropic/csv_scraper_graph_multi_haiku.py b/examples/anthropic/csv_scraper_graph_multi_haiku.py index b833af01..d574da5c 100644 --- a/examples/anthropic/csv_scraper_graph_multi_haiku.py +++ b/examples/anthropic/csv_scraper_graph_multi_haiku.py @@ -26,8 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/csv_scraper_haiku.py b/examples/anthropic/csv_scraper_haiku.py index 2e0ebe81..745926a3 100644 --- a/examples/anthropic/csv_scraper_haiku.py +++ b/examples/anthropic/csv_scraper_haiku.py @@ -32,9 +32,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/custom_graph_haiku.py b/examples/anthropic/custom_graph_haiku.py index cea14361..d8b4dc19 100644 --- a/examples/anthropic/custom_graph_haiku.py +++ b/examples/anthropic/custom_graph_haiku.py @@ -5,10 +5,9 @@ import os from dotenv import load_dotenv -from langchain_openai import OpenAIEmbeddings -from langchain_openai import ChatOpenAI +from langchain_anthropic import ChatAnthropic from scrapegraphai.graphs import BaseGraph -from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode +from scrapegraphai.nodes import FetchNode, ParseNode, GenerateAnswerNode, RobotsNode load_dotenv() # ************************************************ @@ -19,16 +18,14 @@ "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + }, } # ************************************************ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) -embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) +llm_model = ChatAnthropic(graph_config["llm"]) # define the nodes for the graph robot_node = RobotsNode( @@ -57,15 +54,6 @@ "verbose": True, } ) -rag_node = RAGNode( - input="user_prompt & (parsed_doc | doc)", - output=["relevant_chunks"], - node_config={ - "llm_model": llm_model, - "embedder_model": embedder, - "verbose": True, - } -) generate_answer_node = GenerateAnswerNode( input="user_prompt & (relevant_chunks | parsed_doc | doc)", output=["answer"], @@ -84,14 +72,12 @@ robot_node, fetch_node, parse_node, - rag_node, generate_answer_node, ], edges=[ (robot_node, fetch_node), (fetch_node, parse_node), - (parse_node, rag_node), - (rag_node, generate_answer_node) + (parse_node, generate_answer_node) ], entry_point=robot_node ) diff --git a/examples/anthropic/json_scraper_haiku.py b/examples/anthropic/json_scraper_haiku.py index 2610b658..9d5fc8db 100644 --- a/examples/anthropic/json_scraper_haiku.py +++ b/examples/anthropic/json_scraper_haiku.py @@ -26,9 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/json_scraper_multi_haiku.py b/examples/anthropic/json_scraper_multi_haiku.py index 0327673b..d016439d 100644 --- a/examples/anthropic/json_scraper_multi_haiku.py +++ b/examples/anthropic/json_scraper_multi_haiku.py @@ -11,9 +11,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } FILE_NAME = "inputs/example.json" diff --git a/examples/anthropic/pdf_scraper_graph_haiku.py b/examples/anthropic/pdf_scraper_graph_haiku.py index 61be06b4..ee221ac6 100644 --- a/examples/anthropic/pdf_scraper_graph_haiku.py +++ b/examples/anthropic/pdf_scraper_graph_haiku.py @@ -14,9 +14,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } source = """ diff --git a/examples/anthropic/pdf_scraper_multi_haiku.py b/examples/anthropic/pdf_scraper_multi_haiku.py index 974dd2f8..2d117c35 100644 --- a/examples/anthropic/pdf_scraper_multi_haiku.py +++ b/examples/anthropic/pdf_scraper_multi_haiku.py @@ -11,9 +11,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # *************** diff --git a/examples/anthropic/scrape_plain_text_haiku.py b/examples/anthropic/scrape_plain_text_haiku.py index d3f36638..d3099026 100644 --- a/examples/anthropic/scrape_plain_text_haiku.py +++ b/examples/anthropic/scrape_plain_text_haiku.py @@ -28,9 +28,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/script_generator_haiku.py b/examples/anthropic/script_generator_haiku.py index 889ce0b5..bdd0c23b 100644 --- a/examples/anthropic/script_generator_haiku.py +++ b/examples/anthropic/script_generator_haiku.py @@ -16,9 +16,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/script_multi_generator_haiku.py b/examples/anthropic/script_multi_generator_haiku.py index f7c69010..bacf0bfc 100644 --- a/examples/anthropic/script_multi_generator_haiku.py +++ b/examples/anthropic/script_multi_generator_haiku.py @@ -16,10 +16,9 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, - "library": "beautifulsoup" + "model": "anthropic/claude-3-haiku-20240307", + }, + "library": "beautifulsoup" } # ************************************************ diff --git a/examples/anthropic/search_graph_haiku.py b/examples/anthropic/search_graph_haiku.py index f90d7598..97a5213d 100644 --- a/examples/anthropic/search_graph_haiku.py +++ b/examples/anthropic/search_graph_haiku.py @@ -15,9 +15,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/search_graph_schema_haiku.py b/examples/anthropic/search_graph_schema_haiku.py index c9e7a875..1158d58a 100644 --- a/examples/anthropic/search_graph_schema_haiku.py +++ b/examples/anthropic/search_graph_schema_haiku.py @@ -27,8 +27,8 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/search_link_graph_haiku.py b/examples/anthropic/search_link_graph_haiku.py index ccfbc1d2..70798cf3 100644 --- a/examples/anthropic/search_link_graph_haiku.py +++ b/examples/anthropic/search_link_graph_haiku.py @@ -14,23 +14,11 @@ load_dotenv() -llm_model_instance = AzureChatOpenAI( - openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], - azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"] -) - -embedder_model_instance = AzureOpenAIEmbeddings( - azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"], - openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"], -) - -# ************************************************ -# Create the SmartScraperGraph instance and run it -# ************************************************ - graph_config = { - "llm": {"model_instance": llm_model_instance}, - "embeddings": {"model_instance": embedder_model_instance} + "llm": { + "api_key": os.getenv("ANTHROPIC_API_KEY"), + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/smart_scraper_haiku.py b/examples/anthropic/smart_scraper_haiku.py index f0bb2a57..51ca1bf5 100644 --- a/examples/anthropic/smart_scraper_haiku.py +++ b/examples/anthropic/smart_scraper_haiku.py @@ -19,9 +19,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } smart_scraper_graph = SmartScraperGraph( diff --git a/examples/anthropic/smart_scraper_multi_haiku.py b/examples/anthropic/smart_scraper_multi_haiku.py index eb2001d4..f96de0ab 100644 --- a/examples/anthropic/smart_scraper_multi_haiku.py +++ b/examples/anthropic/smart_scraper_multi_haiku.py @@ -17,9 +17,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ******************************************************* diff --git a/examples/anthropic/smart_scraper_schema_haiku.py b/examples/anthropic/smart_scraper_schema_haiku.py index 83cedd2a..bd447a06 100644 --- a/examples/anthropic/smart_scraper_schema_haiku.py +++ b/examples/anthropic/smart_scraper_schema_haiku.py @@ -33,8 +33,8 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } smart_scraper_graph = SmartScraperGraph( diff --git a/examples/anthropic/xml_scraper_graph_multi_haiku.py b/examples/anthropic/xml_scraper_graph_multi_haiku.py index 6b79f709..6e9bc5f8 100644 --- a/examples/anthropic/xml_scraper_graph_multi_haiku.py +++ b/examples/anthropic/xml_scraper_graph_multi_haiku.py @@ -26,8 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000}, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/anthropic/xml_scraper_haiku.py b/examples/anthropic/xml_scraper_haiku.py index dd64f571..2dc4b8d2 100644 --- a/examples/anthropic/xml_scraper_haiku.py +++ b/examples/anthropic/xml_scraper_haiku.py @@ -26,9 +26,8 @@ graph_config = { "llm": { "api_key": os.getenv("ANTHROPIC_API_KEY"), - "model": "claude-3-haiku-20240307", - "max_tokens": 4000 - }, + "model": "anthropic/claude-3-haiku-20240307", + }, } # ************************************************ diff --git a/examples/deepseek/csv_scraper_deepseek.py b/examples/deepseek/csv_scraper_deepseek.py index b734b543..60b1c394 100644 --- a/examples/deepseek/csv_scraper_deepseek.py +++ b/examples/deepseek/csv_scraper_deepseek.py @@ -27,7 +27,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/csv_scraper_graph_multi_deepseek.py b/examples/deepseek/csv_scraper_graph_multi_deepseek.py index ea5e9154..0a08f83f 100644 --- a/examples/deepseek/csv_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/csv_scraper_graph_multi_deepseek.py @@ -27,7 +27,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/json_scraper_deepseek.py b/examples/deepseek/json_scraper_deepseek.py index dfe6f489..02991c0d 100644 --- a/examples/deepseek/json_scraper_deepseek.py +++ b/examples/deepseek/json_scraper_deepseek.py @@ -26,7 +26,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/json_scraper_multi_deepseek.py b/examples/deepseek/json_scraper_multi_deepseek.py index b957dde0..4f9ca32d 100644 --- a/examples/deepseek/json_scraper_multi_deepseek.py +++ b/examples/deepseek/json_scraper_multi_deepseek.py @@ -12,7 +12,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/pdf_scraper_graph_deepseek.py b/examples/deepseek/pdf_scraper_graph_deepseek.py index d66bbef5..c9c5e0b2 100644 --- a/examples/deepseek/pdf_scraper_graph_deepseek.py +++ b/examples/deepseek/pdf_scraper_graph_deepseek.py @@ -17,7 +17,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/pdf_scraper_multi_deepseek.py b/examples/deepseek/pdf_scraper_multi_deepseek.py index 211e4635..e43dd10a 100644 --- a/examples/deepseek/pdf_scraper_multi_deepseek.py +++ b/examples/deepseek/pdf_scraper_multi_deepseek.py @@ -12,7 +12,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/scrape_plain_text_deepseek.py b/examples/deepseek/scrape_plain_text_deepseek.py index d7a070d7..a7834a8f 100644 --- a/examples/deepseek/scrape_plain_text_deepseek.py +++ b/examples/deepseek/scrape_plain_text_deepseek.py @@ -28,7 +28,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/script_generator_deepseek.py b/examples/deepseek/script_generator_deepseek.py index fd5fd4dd..3de06f25 100644 --- a/examples/deepseek/script_generator_deepseek.py +++ b/examples/deepseek/script_generator_deepseek.py @@ -17,7 +17,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/script_multi_generator_deepseek.py b/examples/deepseek/script_multi_generator_deepseek.py index 2ebfd90a..cc577ecd 100644 --- a/examples/deepseek/script_multi_generator_deepseek.py +++ b/examples/deepseek/script_multi_generator_deepseek.py @@ -17,7 +17,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/search_graph_deepseek.py b/examples/deepseek/search_graph_deepseek.py index 176d6107..54d2e9fa 100644 --- a/examples/deepseek/search_graph_deepseek.py +++ b/examples/deepseek/search_graph_deepseek.py @@ -15,7 +15,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/search_graph_schema_deepseek.py b/examples/deepseek/search_graph_schema_deepseek.py index f5db278e..bcebe76d 100644 --- a/examples/deepseek/search_graph_schema_deepseek.py +++ b/examples/deepseek/search_graph_schema_deepseek.py @@ -31,7 +31,7 @@ class Dishes(BaseModel): graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/search_link_graph_deepseek.py b/examples/deepseek/search_link_graph_deepseek.py index 6a35f177..96f886a9 100644 --- a/examples/deepseek/search_link_graph_deepseek.py +++ b/examples/deepseek/search_link_graph_deepseek.py @@ -16,7 +16,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/smart_scraper_deepseek.py b/examples/deepseek/smart_scraper_deepseek.py index ed291b02..50314819 100644 --- a/examples/deepseek/smart_scraper_deepseek.py +++ b/examples/deepseek/smart_scraper_deepseek.py @@ -18,7 +18,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/smart_scraper_multi_deepseek.py b/examples/deepseek/smart_scraper_multi_deepseek.py index fafe7261..374cc6e2 100644 --- a/examples/deepseek/smart_scraper_multi_deepseek.py +++ b/examples/deepseek/smart_scraper_multi_deepseek.py @@ -16,7 +16,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/smart_scraper_schema_deepseek.py b/examples/deepseek/smart_scraper_schema_deepseek.py index 5cbbb702..6d164eb1 100644 --- a/examples/deepseek/smart_scraper_schema_deepseek.py +++ b/examples/deepseek/smart_scraper_schema_deepseek.py @@ -30,7 +30,7 @@ class Projects(BaseModel): graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/xml_scraper_deepseek.py b/examples/deepseek/xml_scraper_deepseek.py index ba401b91..d69665f4 100644 --- a/examples/deepseek/xml_scraper_deepseek.py +++ b/examples/deepseek/xml_scraper_deepseek.py @@ -28,7 +28,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/deepseek/xml_scraper_graph_multi_deepseek.py b/examples/deepseek/xml_scraper_graph_multi_deepseek.py index 0f53a6b2..5098c9fd 100644 --- a/examples/deepseek/xml_scraper_graph_multi_deepseek.py +++ b/examples/deepseek/xml_scraper_graph_multi_deepseek.py @@ -27,7 +27,7 @@ graph_config = { "llm": { - "model": "deepseek-chat", + "model": "deepseek/deepseek-chat", "openai_api_key": deepseek_key, "openai_api_base": 'https://api.deepseek.com/v1', }, diff --git a/examples/ernie/csv_scraper_ernie.py b/examples/ernie/csv_scraper_ernie.py index 1594d17c..410e300e 100644 --- a/examples/ernie/csv_scraper_ernie.py +++ b/examples/ernie/csv_scraper_ernie.py @@ -25,16 +25,12 @@ graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 } +} # ************************************************ # Create the CSVScraperGraph instance and run it diff --git a/examples/ernie/custom_graph_ernie.py b/examples/ernie/custom_graph_ernie.py index f750276a..5dad8bac 100644 --- a/examples/ernie/custom_graph_ernie.py +++ b/examples/ernie/custom_graph_ernie.py @@ -14,24 +14,20 @@ # Define the configuration for the graph # ************************************************ -graph_config = { - "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } } # ************************************************ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) # define the nodes for the graph diff --git a/examples/ernie/deep_scraper_ernie.py b/examples/ernie/deep_scraper_ernie.py index 059f7a74..b8c6501a 100644 --- a/examples/ernie/deep_scraper_ernie.py +++ b/examples/ernie/deep_scraper_ernie.py @@ -18,7 +18,7 @@ graph_config = { "llm": { - "model": "ernie-bot-turbo", + "model": "ernie/ernie-bot-turbo", "ernie_client_id": "", "ernie_client_secret": "", "temperature": 0.1 diff --git a/examples/ernie/json_scraper_ernie.py b/examples/ernie/json_scraper_ernie.py index ddd67050..e73ebc10 100644 --- a/examples/ernie/json_scraper_ernie.py +++ b/examples/ernie/json_scraper_ernie.py @@ -21,17 +21,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { - "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"} +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } } # ************************************************ diff --git a/examples/ernie/pdf_scraper_graph_ernie.py b/examples/ernie/pdf_scraper_graph_ernie.py index 3de975a0..6016da7a 100644 --- a/examples/ernie/pdf_scraper_graph_ernie.py +++ b/examples/ernie/pdf_scraper_graph_ernie.py @@ -7,16 +7,12 @@ graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 } +} source = """ The Divine Comedy, Italian La Divina Commedia, original name La commedia, long narrative poem written in Italian diff --git a/examples/ernie/scrape_plain_text_ernie.py b/examples/ernie/scrape_plain_text_ernie.py index 27b4f08b..c6bb715a 100644 --- a/examples/ernie/scrape_plain_text_ernie.py +++ b/examples/ernie/scrape_plain_text_ernie.py @@ -25,17 +25,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { - "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434",} +graph_config = { + "llm": { + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + } } # ************************************************ diff --git a/examples/ernie/script_generator_ernie.py b/examples/ernie/script_generator_ernie.py index 14c00ab4..42e136ff 100644 --- a/examples/ernie/script_generator_ernie.py +++ b/examples/ernie/script_generator_ernie.py @@ -13,12 +13,12 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "library": "beautifulsoup" } diff --git a/examples/ernie/script_multi_generator_ernie.py b/examples/ernie/script_multi_generator_ernie.py index 73e9f5ab..285d491a 100644 --- a/examples/ernie/script_multi_generator_ernie.py +++ b/examples/ernie/script_multi_generator_ernie.py @@ -9,17 +9,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { +graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"}, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + }, "library": "beautifulsoup" } diff --git a/examples/ernie/search_graph_ernie.py b/examples/ernie/search_graph_ernie.py index c04d9f9b..0e811683 100644 --- a/examples/ernie/search_graph_ernie.py +++ b/examples/ernie/search_graph_ernie.py @@ -12,17 +12,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { +graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"}, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + }, "library": "beautifulsoup" } diff --git a/examples/ernie/search_link_graph_ernie.py b/examples/ernie/search_link_graph_ernie.py index 466b230c..f38b2772 100644 --- a/examples/ernie/search_link_graph_ernie.py +++ b/examples/ernie/search_link_graph_ernie.py @@ -8,17 +8,13 @@ # Define the configuration for the graph # ************************************************ -graph_config = { +graph_config = { "llm": { - "model": "ernie-bot-turbo", - "ernie_client_id": "", - "ernie_client_secret": "", - "temperature": 0.1 - }, - "embeddings": { - "model": "ollama/nomic-embed-text", - "temperature": 0, - "base_url": "http://localhost:11434"}, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 + }, "library": "beautifulsoup" } diff --git a/examples/ernie/smart_scraper_ernie.py b/examples/ernie/smart_scraper_ernie.py index dcee0972..56084dad 100644 --- a/examples/ernie/smart_scraper_ernie.py +++ b/examples/ernie/smart_scraper_ernie.py @@ -14,15 +14,14 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, - "verbose": False, - "headless": False, + "library": "beautifulsoup" } # ************************************************ diff --git a/examples/ernie/smart_scraper_multi_ernie.py b/examples/ernie/smart_scraper_multi_ernie.py index ddfc6239..6b62b685 100644 --- a/examples/ernie/smart_scraper_multi_ernie.py +++ b/examples/ernie/smart_scraper_multi_ernie.py @@ -12,12 +12,12 @@ # Define the configuration for the graph # ************************************************ -openai_key = os.getenv("OPENAI_APIKEY") - -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-4o", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "verbose": True, "headless": False, diff --git a/examples/ernie/smart_scraper_schema_ernie.py b/examples/ernie/smart_scraper_schema_ernie.py index 64a74937..b0fe3d7e 100644 --- a/examples/ernie/smart_scraper_schema_ernie.py +++ b/examples/ernie/smart_scraper_schema_ernie.py @@ -34,10 +34,12 @@ class Projects(BaseModel): openai_key = os.getenv("OPENAI_APIKEY") -graph_config = { +graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "verbose": True, "headless": False, diff --git a/examples/ernie/speech_graph_ernie.py b/examples/ernie/speech_graph_ernie.py index 15cc2cfb..cece3149 100644 --- a/examples/ernie/speech_graph_ernie.py +++ b/examples/ernie/speech_graph_ernie.py @@ -24,9 +24,10 @@ graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", - "temperature": 0.7, + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "tts_model": { "api_key": openai_key, diff --git a/examples/ernie/xml_scraper_ernie.py b/examples/ernie/xml_scraper_ernie.py index 5be5716e..a5bf03e0 100644 --- a/examples/ernie/xml_scraper_ernie.py +++ b/examples/ernie/xml_scraper_ernie.py @@ -27,8 +27,10 @@ graph_config = { "llm": { - "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "ernie/ernie-bot-turbo", + "ernie_client_id": "", + "ernie_client_secret": "", + "temperature": 0.1 }, "verbose":False, } diff --git a/examples/mistral/csv_scraper_graph_multi_mistral.py b/examples/mistral/csv_scraper_graph_multi_mistral.py index c3a25e2a..615e59e4 100644 --- a/examples/mistral/csv_scraper_graph_multi_mistral.py +++ b/examples/mistral/csv_scraper_graph_multi_mistral.py @@ -27,7 +27,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/csv_scraper_mistral.py b/examples/mistral/csv_scraper_mistral.py index 63ecfbca..195fb16a 100644 --- a/examples/mistral/csv_scraper_mistral.py +++ b/examples/mistral/csv_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/custom_graph_mistral.py b/examples/mistral/custom_graph_mistral.py index c839f7b6..f02ead0c 100644 --- a/examples/mistral/custom_graph_mistral.py +++ b/examples/mistral/custom_graph_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/deep_scraper_mistral.py b/examples/mistral/deep_scraper_mistral.py index 5cf576e7..bf0f6ba4 100644 --- a/examples/mistral/deep_scraper_mistral.py +++ b/examples/mistral/deep_scraper_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "max_depth": 1 diff --git a/examples/mistral/json_scraper_mistral.py b/examples/mistral/json_scraper_mistral.py index 2a29c5a7..12f55127 100644 --- a/examples/mistral/json_scraper_mistral.py +++ b/examples/mistral/json_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/json_scraper_multi_mistral.py b/examples/mistral/json_scraper_multi_mistral.py index 07e65c95..1369eda7 100644 --- a/examples/mistral/json_scraper_multi_mistral.py +++ b/examples/mistral/json_scraper_multi_mistral.py @@ -13,7 +13,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", } } diff --git a/examples/mistral/md_scraper_mistral.py b/examples/mistral/md_scraper_mistral.py index 45995cb7..c4e3f2c7 100644 --- a/examples/mistral/md_scraper_mistral.py +++ b/examples/mistral/md_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/pdf_scraper_mistral.py b/examples/mistral/pdf_scraper_mistral.py index 9636f7f7..b006fdb8 100644 --- a/examples/mistral/pdf_scraper_mistral.py +++ b/examples/mistral/pdf_scraper_mistral.py @@ -14,7 +14,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, } diff --git a/examples/mistral/pdf_scraper_multi_mistral.py b/examples/mistral/pdf_scraper_multi_mistral.py index 97ad3222..e9f1613f 100644 --- a/examples/mistral/pdf_scraper_multi_mistral.py +++ b/examples/mistral/pdf_scraper_multi_mistral.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, } diff --git a/examples/mistral/scrape_plain_text_mistral.py b/examples/mistral/scrape_plain_text_mistral.py index 3bf199ad..f2b38172 100644 --- a/examples/mistral/scrape_plain_text_mistral.py +++ b/examples/mistral/scrape_plain_text_mistral.py @@ -30,7 +30,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, } diff --git a/examples/mistral/script_generator_mistral.py b/examples/mistral/script_generator_mistral.py index 464a522c..4fe45773 100644 --- a/examples/mistral/script_generator_mistral.py +++ b/examples/mistral/script_generator_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "library": "beautifulsoup" } diff --git a/examples/mistral/script_generator_schema_mistral.py b/examples/mistral/script_generator_schema_mistral.py index 8172f9a1..b9c77285 100644 --- a/examples/mistral/script_generator_schema_mistral.py +++ b/examples/mistral/script_generator_schema_mistral.py @@ -32,7 +32,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/mistral/script_multi_generator_mistral.py b/examples/mistral/script_multi_generator_mistral.py index 4efa6914..f4d5d5b5 100644 --- a/examples/mistral/script_multi_generator_mistral.py +++ b/examples/mistral/script_multi_generator_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/mistral/search_graph_mistral.py b/examples/mistral/search_graph_mistral.py index 68a480d3..f8573f5e 100644 --- a/examples/mistral/search_graph_mistral.py +++ b/examples/mistral/search_graph_mistral.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "max_results": 2, "verbose": True, diff --git a/examples/mistral/search_graph_schema_mistral.py b/examples/mistral/search_graph_schema_mistral.py index d4588289..7c71c0b1 100644 --- a/examples/mistral/search_graph_schema_mistral.py +++ b/examples/mistral/search_graph_schema_mistral.py @@ -31,7 +31,7 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "max_results": 2, "verbose": True, diff --git a/examples/mistral/search_link_graph_mistral.py b/examples/mistral/search_link_graph_mistral.py index 7191b27e..3216ff2c 100644 --- a/examples/mistral/search_link_graph_mistral.py +++ b/examples/mistral/search_link_graph_mistral.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/smart_scraper_mistral.py b/examples/mistral/smart_scraper_mistral.py index 80d09e6d..7291a40a 100644 --- a/examples/mistral/smart_scraper_mistral.py +++ b/examples/mistral/smart_scraper_mistral.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": os.getenv("MISTRAL_API_KEY"), - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/smart_scraper_multi_mistral.py b/examples/mistral/smart_scraper_multi_mistral.py index c86bb787..2654fbcb 100644 --- a/examples/mistral/smart_scraper_multi_mistral.py +++ b/examples/mistral/smart_scraper_multi_mistral.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/smart_scraper_schema_mistral.py b/examples/mistral/smart_scraper_schema_mistral.py index 6d6b9ad3..3e1e505a 100644 --- a/examples/mistral/smart_scraper_schema_mistral.py +++ b/examples/mistral/smart_scraper_schema_mistral.py @@ -30,7 +30,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key":mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/xml_scraper_graph_multi_mistral.py b/examples/mistral/xml_scraper_graph_multi_mistral.py index b9d46b0e..0ea9d30c 100644 --- a/examples/mistral/xml_scraper_graph_multi_mistral.py +++ b/examples/mistral/xml_scraper_graph_multi_mistral.py @@ -29,7 +29,7 @@ graph_config = { "llm": { "api_key":mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose": True, "headless": False, diff --git a/examples/mistral/xml_scraper_mistral.py b/examples/mistral/xml_scraper_mistral.py index c2675c6d..eb6036bf 100644 --- a/examples/mistral/xml_scraper_mistral.py +++ b/examples/mistral/xml_scraper_mistral.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": mistral_key, - "model": "mistral/open-mistral-nemo", + "model": "mistralai/open-mistral-nemo", }, "verbose":False, } diff --git a/examples/openai/csv_scraper_graph_multi_openai.py b/examples/openai/csv_scraper_graph_multi_openai.py index 7b91c896..5e876dcb 100644 --- a/examples/openai/csv_scraper_graph_multi_openai.py +++ b/examples/openai/csv_scraper_graph_multi_openai.py @@ -27,7 +27,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/csv_scraper_openai.py b/examples/openai/csv_scraper_openai.py index 744fc7a4..f4410fcd 100644 --- a/examples/openai/csv_scraper_openai.py +++ b/examples/openai/csv_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/custom_graph_openai.py b/examples/openai/custom_graph_openai.py index cc7e715d..b1471a21 100644 --- a/examples/openai/custom_graph_openai.py +++ b/examples/openai/custom_graph_openai.py @@ -27,7 +27,7 @@ # Define the graph nodes # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) # define the nodes for the graph diff --git a/examples/openai/deep_scraper_openai.py b/examples/openai/deep_scraper_openai.py index 5b7202d4..b20e164d 100644 --- a/examples/openai/deep_scraper_openai.py +++ b/examples/openai/deep_scraper_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "max_depth": 1 diff --git a/examples/openai/json_scraper_multi_openai.py b/examples/openai/json_scraper_multi_openai.py index b27e5050..f7cb528a 100644 --- a/examples/openai/json_scraper_multi_openai.py +++ b/examples/openai/json_scraper_multi_openai.py @@ -13,7 +13,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", } } diff --git a/examples/openai/json_scraper_openai.py b/examples/openai/json_scraper_openai.py index eb5d1e7e..e20a5870 100644 --- a/examples/openai/json_scraper_openai.py +++ b/examples/openai/json_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/md_scraper_openai.py b/examples/openai/md_scraper_openai.py index 2c264ab9..3456c89a 100644 --- a/examples/openai/md_scraper_openai.py +++ b/examples/openai/md_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/omni_scraper_openai.py b/examples/openai/omni_scraper_openai.py index 1d1d86ba..3e6e62ee 100644 --- a/examples/openai/omni_scraper_openai.py +++ b/examples/openai/omni_scraper_openai.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": True, diff --git a/examples/openai/omni_search_openai.py b/examples/openai/omni_search_openai.py index ed0f8f3c..fb967def 100644 --- a/examples/openai/omni_search_openai.py +++ b/examples/openai/omni_search_openai.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "max_results": 2, "max_images": 1, diff --git a/examples/openai/pdf_scraper_multi_openai.py b/examples/openai/pdf_scraper_multi_openai.py index 49a9c7fa..91e219e3 100644 --- a/examples/openai/pdf_scraper_multi_openai.py +++ b/examples/openai/pdf_scraper_multi_openai.py @@ -19,7 +19,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, } diff --git a/examples/openai/pdf_scraper_openai.py b/examples/openai/pdf_scraper_openai.py index 2b0e19f3..e076defe 100644 --- a/examples/openai/pdf_scraper_openai.py +++ b/examples/openai/pdf_scraper_openai.py @@ -14,7 +14,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, } diff --git a/examples/openai/scrape_plain_text_openai.py b/examples/openai/scrape_plain_text_openai.py index 7f390cff..eb8c76e5 100644 --- a/examples/openai/scrape_plain_text_openai.py +++ b/examples/openai/scrape_plain_text_openai.py @@ -30,7 +30,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, } diff --git a/examples/openai/screenshot_scraper.py b/examples/openai/screenshot_scraper.py index 826dcc50..c72c44d1 100644 --- a/examples/openai/screenshot_scraper.py +++ b/examples/openai/screenshot_scraper.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/script_generator_openai.py b/examples/openai/script_generator_openai.py index 046a25ec..e67ad52b 100644 --- a/examples/openai/script_generator_openai.py +++ b/examples/openai/script_generator_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "library": "beautifulsoup" } diff --git a/examples/openai/script_generator_schema_openai.py b/examples/openai/script_generator_schema_openai.py index a728c8a1..5e542c53 100644 --- a/examples/openai/script_generator_schema_openai.py +++ b/examples/openai/script_generator_schema_openai.py @@ -32,7 +32,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "openai/gpt-3.5-turbo", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/openai/script_multi_generator_openai.py b/examples/openai/script_multi_generator_openai.py index d46d2294..3fdd029f 100644 --- a/examples/openai/script_multi_generator_openai.py +++ b/examples/openai/script_multi_generator_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "library": "beautifulsoup", "verbose": True, diff --git a/examples/openai/search_graph_openai.py b/examples/openai/search_graph_openai.py index c12caa4f..8d869c19 100644 --- a/examples/openai/search_graph_openai.py +++ b/examples/openai/search_graph_openai.py @@ -16,7 +16,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "max_results": 2, "verbose": True, diff --git a/examples/openai/search_graph_schema_openai.py b/examples/openai/search_graph_schema_openai.py index ecbcc644..571f08b0 100644 --- a/examples/openai/search_graph_schema_openai.py +++ b/examples/openai/search_graph_schema_openai.py @@ -31,7 +31,7 @@ class Dishes(BaseModel): graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-3.5-turbo", + "model": "openai/gpt-3.5-turbo", }, "max_results": 2, "verbose": True, diff --git a/examples/openai/search_link_graph_openai.py b/examples/openai/search_link_graph_openai.py index 818f9434..a988731b 100644 --- a/examples/openai/search_link_graph_openai.py +++ b/examples/openai/search_link_graph_openai.py @@ -17,7 +17,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_multi_openai.py b/examples/openai/smart_scraper_multi_openai.py index 504e00a8..8f5e648b 100644 --- a/examples/openai/smart_scraper_multi_openai.py +++ b/examples/openai/smart_scraper_multi_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py index 119f67e5..2962f51b 100644 --- a/examples/openai/smart_scraper_openai.py +++ b/examples/openai/smart_scraper_openai.py @@ -18,7 +18,7 @@ graph_config = { "llm": { "api_key": os.getenv("OPENAI_API_KEY"), - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/smart_scraper_schema_openai.py b/examples/openai/smart_scraper_schema_openai.py index 828a9b0a..0c1618d6 100644 --- a/examples/openai/smart_scraper_schema_openai.py +++ b/examples/openai/smart_scraper_schema_openai.py @@ -30,7 +30,7 @@ class Projects(BaseModel): graph_config = { "llm": { "api_key":openai_key, - "model": "gpt-4o-mini", + "model": "openai/gpt-4o-mini", }, "verbose": True, "headless": False, diff --git a/examples/openai/speech_graph_openai.py b/examples/openai/speech_graph_openai.py index 603ce51c..7c368df7 100644 --- a/examples/openai/speech_graph_openai.py +++ b/examples/openai/speech_graph_openai.py @@ -25,7 +25,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", "temperature": 0.7, }, "tts_model": { diff --git a/examples/openai/xml_scraper_graph_multi_openai.py b/examples/openai/xml_scraper_graph_multi_openai.py index ef46b877..6610a49f 100644 --- a/examples/openai/xml_scraper_graph_multi_openai.py +++ b/examples/openai/xml_scraper_graph_multi_openai.py @@ -29,7 +29,7 @@ graph_config = { "llm": { "api_key":openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose": True, "headless": False, diff --git a/examples/openai/xml_scraper_openai.py b/examples/openai/xml_scraper_openai.py index b2b5075e..04b3ec9d 100644 --- a/examples/openai/xml_scraper_openai.py +++ b/examples/openai/xml_scraper_openai.py @@ -28,7 +28,7 @@ graph_config = { "llm": { "api_key": openai_key, - "model": "gpt-4o", + "model": "openai/gpt-4o", }, "verbose":False, } diff --git a/examples/single_node/kg_node.py b/examples/single_node/kg_node.py index dd5a6d04..37d1d9a4 100644 --- a/examples/single_node/kg_node.py +++ b/examples/single_node/kg_node.py @@ -57,7 +57,7 @@ # Define the node # ************************************************ -llm_model = OpenAI(graph_config["llm"]) +llm_model = ChatOpenAI(graph_config["llm"]) robots_node = KnowledgeGraphNode( input="user_prompt & answer_dict", diff --git a/examples/single_node/robot_node.py b/examples/single_node/robot_node.py index c2bcbbd1..dcb70e3d 100644 --- a/examples/single_node/robot_node.py +++ b/examples/single_node/robot_node.py @@ -11,12 +11,12 @@ graph_config = { "llm": { - "model": "ollama/llama3", + "model": "llama3", "temperature": 0, "streaming": True }, "embeddings": { - "model": "ollama/nomic-embed-text", + "model": "nomic-embed-text", "temperature": 0, # "base_url": "http://localhost:11434", # set ollama URL arbitrarily } diff --git a/examples/single_node/search_internet_node.py b/examples/single_node/search_internet_node.py index 8a8149fa..c998cdd1 100644 --- a/examples/single_node/search_internet_node.py +++ b/examples/single_node/search_internet_node.py @@ -2,7 +2,7 @@ Example of custom graph using existing nodes """ -from scrapegraphai.models import Ollama +from langchain_community.chat_models import ChatOllama from scrapegraphai.nodes import SearchInternetNode # ************************************************ @@ -24,7 +24,7 @@ # Define the node # ************************************************ -llm_model = Ollama(graph_config["llm"]) +llm_model = ChatOllama(graph_config["llm"]) search_node = SearchInternetNode( input="user_input", diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index d4c9f39e..556d6ad6 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -51,7 +51,7 @@ "gemini-1.5-pro-latest": 128000, "models/embedding-001": 2048 }, - "google_vertexai": { + "google_vertexai": { "gemini-1.5-flash": 128000, "gemini-1.5-pro": 128000, "gemini-1.0-pro": 128000, @@ -137,6 +137,7 @@ "claude-3-sonnet-20240229": 200000, "claude-3-haiku-20240307": 200000, "claude-3-5-sonnet-20240620": 200000, + "claude-3-haiku-20240307": 4000, }, "bedrock": { "anthropic.claude-3-5-sonnet-20240620": 200000, From 229d74d4bd39befa3723fa2841e23d40007a9772 Mon Sep 17 00:00:00 2001 From: Federico Aguzzi <62149513+f-aguzzi@users.noreply.github.com> Date: Tue, 27 Aug 2024 13:46:42 +0200 Subject: [PATCH 3/3] test(AbstractGraph): add AbstractGraph tests --- requirements-dev.lock | 149 --------------------------- requirements.lock | 154 ---------------------------- requirements.txt | 14 +-- tests/graphs/abstract_graph_test.py | 31 ++++++ 4 files changed, 34 insertions(+), 314 deletions(-) create mode 100644 tests/graphs/abstract_graph_test.py diff --git a/requirements-dev.lock b/requirements-dev.lock index 64af8ee8..04ca69d9 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -15,8 +15,6 @@ aiohappyeyeballs==2.3.5 aiohttp==3.10.3 # via langchain # via langchain-community - # via langchain-fireworks - # via langchain-nvidia-ai-endpoints aiosignal==1.3.1 # via aiohttp alabaster==0.7.16 @@ -25,11 +23,7 @@ altair==5.4.0 # via streamlit annotated-types==0.7.0 # via pydantic -anthropic==0.33.0 - # via langchain-anthropic anyio==4.4.0 - # via anthropic - # via groq # via httpx # via openai # via starlette @@ -55,8 +49,6 @@ boto3==1.34.158 botocore==1.34.158 # via boto3 # via s3transfer -browserbase==0.3.0 - # via scrapegraphai burr==0.22.1 # via scrapegraphai cachetools==5.4.0 @@ -78,17 +70,11 @@ cycler==0.12.1 # via matplotlib dataclasses-json==0.6.7 # via langchain-community -defusedxml==0.7.1 - # via langchain-anthropic dill==0.3.8 # via multiprocess # via pylint distro==1.9.0 - # via anthropic - # via groq # via openai -docstring-parser==0.16 - # via google-cloud-aiplatform docutils==0.19 # via sphinx exceptiongroup==1.2.2 @@ -102,10 +88,6 @@ fastapi-pagination==0.12.26 # via burr filelock==3.15.4 # via huggingface-hub - # via torch - # via transformers -fireworks-ai==0.15.0 - # via langchain-fireworks fonttools==4.53.1 # via matplotlib free-proxy==1.1.1 @@ -115,7 +97,6 @@ frozenlist==1.4.1 # via aiosignal fsspec==2024.6.1 # via huggingface-hub - # via torch furo==2024.5.6 # via scrapegraphai gitdb==4.0.11 @@ -129,11 +110,6 @@ google-ai-generativelanguage==0.6.6 google-api-core==2.19.1 # via google-ai-generativelanguage # via google-api-python-client - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-api-python-client==2.140.0 # via google-generativeai @@ -142,52 +118,21 @@ google-auth==2.33.0 # via google-api-core # via google-api-python-client # via google-auth-httplib2 - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-auth-httplib2==0.2.0 # via google-api-python-client -google-cloud-aiplatform==1.61.0 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via google-cloud-bigquery - # via google-cloud-storage -google-cloud-resource-manager==1.12.5 - # via google-cloud-aiplatform -google-cloud-storage==2.18.2 - # via google-cloud-aiplatform - # via langchain-google-vertexai -google-crc32c==1.5.0 - # via google-cloud-storage - # via google-resumable-media google-generativeai==0.7.2 # via langchain-google-genai -google-resumable-media==2.7.2 - # via google-cloud-bigquery - # via google-cloud-storage googleapis-common-protos==1.63.2 # via google-api-core - # via grpc-google-iam-v1 # via grpcio-status graphviz==0.20.3 # via burr - # via scrapegraphai greenlet==3.0.3 # via playwright # via sqlalchemy -groq==0.9.0 - # via langchain-groq -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager grpcio==1.65.4 # via google-api-core - # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status grpcio-status==1.62.3 # via google-api-core @@ -202,20 +147,12 @@ httplib2==0.22.0 # via google-api-python-client # via google-auth-httplib2 httpx==0.27.0 - # via anthropic - # via browserbase - # via fireworks-ai - # via groq # via langchain-mistralai # via openai httpx-sse==0.4.0 - # via fireworks-ai # via langchain-mistralai huggingface-hub==0.24.5 - # via langchain-huggingface - # via sentence-transformers # via tokenizers - # via transformers idna==3.7 # via anyio # via httpx @@ -236,15 +173,11 @@ jinja2==3.1.4 # via burr # via pydeck # via sphinx - # via torch jiter==0.5.0 - # via anthropic # via openai jmespath==1.0.1 # via boto3 # via botocore -joblib==1.4.2 - # via scikit-learn jsonpatch==1.33 # via langchain-core jsonpointer==3.0.0 @@ -258,40 +191,22 @@ kiwisolver==1.4.5 langchain==0.2.14 # via langchain-community # via scrapegraphai -langchain-anthropic==0.1.22 - # via scrapegraphai langchain-aws==0.1.16 # via scrapegraphai langchain-community==0.2.11 # via scrapegraphai langchain-core==0.2.33 # via langchain - # via langchain-anthropic # via langchain-aws # via langchain-community - # via langchain-fireworks # via langchain-google-genai - # via langchain-google-vertexai - # via langchain-groq - # via langchain-huggingface # via langchain-mistralai - # via langchain-nvidia-ai-endpoints # via langchain-openai # via langchain-text-splitters -langchain-fireworks==0.1.7 - # via scrapegraphai langchain-google-genai==1.0.8 # via scrapegraphai -langchain-google-vertexai==1.0.8 - # via scrapegraphai -langchain-groq==0.1.9 - # via scrapegraphai -langchain-huggingface==0.0.3 - # via scrapegraphai langchain-mistralai==0.1.12 # via scrapegraphai -langchain-nvidia-ai-endpoints==0.2.1 - # via scrapegraphai langchain-openai==0.1.22 # via scrapegraphai langchain-text-splitters==0.2.2 @@ -320,8 +235,6 @@ minify-html==0.15.0 # via scrapegraphai mpire==2.10.2 # via semchunk -mpmath==1.3.0 - # via sympy multidict==6.0.5 # via aiohttp # via yarl @@ -331,8 +244,6 @@ mypy-extensions==1.0.0 # via typing-inspect narwhals==1.3.0 # via altair -networkx==3.2.1 - # via torch numpy==1.26.4 # via contourpy # via faiss-cpu @@ -343,24 +254,16 @@ numpy==1.26.4 # via pandas # via pyarrow # via pydeck - # via scikit-learn - # via scipy - # via sentence-transformers # via sf-hamilton - # via shapely # via streamlit - # via transformers openai==1.40.3 # via burr - # via langchain-fireworks # via langchain-openai orjson==3.10.7 # via langsmith packaging==24.1 # via altair # via faiss-cpu - # via google-cloud-aiplatform - # via google-cloud-bigquery # via huggingface-hub # via langchain-core # via marshmallow @@ -368,21 +271,16 @@ packaging==24.1 # via pytest # via sphinx # via streamlit - # via transformers pandas==2.2.2 # via scrapegraphai # via sf-hamilton # via streamlit pillow==10.4.0 - # via fireworks-ai - # via langchain-nvidia-ai-endpoints # via matplotlib - # via sentence-transformers # via streamlit platformdirs==4.2.2 # via pylint playwright==1.45.1 - # via browserbase # via scrapegraphai # via undetected-playwright pluggy==1.5.0 @@ -390,16 +288,11 @@ pluggy==1.5.0 proto-plus==1.24.0 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager protobuf==4.25.4 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager # via google-generativeai # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status # via proto-plus # via streamlit @@ -411,15 +304,10 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pydantic==2.8.2 - # via anthropic - # via browserbase # via burr # via fastapi # via fastapi-pagination - # via fireworks-ai - # via google-cloud-aiplatform # via google-generativeai - # via groq # via langchain # via langchain-core # via langsmith @@ -444,7 +332,6 @@ pytest==8.0.0 pytest-mock==3.14.0 python-dateutil==2.9.0.post0 # via botocore - # via google-cloud-bigquery # via matplotlib # via pandas python-dotenv==1.0.1 @@ -456,28 +343,22 @@ pyyaml==6.0.2 # via langchain # via langchain-community # via langchain-core - # via transformers referencing==0.35.1 # via jsonschema # via jsonschema-specifications regex==2024.7.24 # via tiktoken - # via transformers requests==2.32.3 # via burr # via free-proxy # via google-api-core - # via google-cloud-bigquery - # via google-cloud-storage # via huggingface-hub # via langchain # via langchain-community - # via langchain-fireworks # via langsmith # via sphinx # via streamlit # via tiktoken - # via transformers rich==13.7.1 # via streamlit rpds-py==0.20.0 @@ -487,29 +368,16 @@ rsa==4.9 # via google-auth s3transfer==0.10.2 # via boto3 -safetensors==0.4.4 - # via transformers -scikit-learn==1.5.1 - # via sentence-transformers -scipy==1.13.1 - # via scikit-learn - # via sentence-transformers semchunk==2.2.0 # via scrapegraphai -sentence-transformers==3.0.1 - # via langchain-huggingface sf-hamilton==1.73.1 # via burr -shapely==2.0.5 - # via google-cloud-aiplatform six==1.16.0 # via python-dateutil smmap==5.0.1 # via gitdb sniffio==1.3.1 - # via anthropic # via anyio - # via groq # via httpx # via openai snowballstemmer==2.2.0 @@ -541,23 +409,16 @@ starlette==0.37.2 # via fastapi streamlit==1.37.1 # via burr -sympy==1.13.2 - # via torch tenacity==8.5.0 # via langchain # via langchain-community # via langchain-core # via streamlit -threadpoolctl==3.5.0 - # via scikit-learn tiktoken==0.7.0 # via langchain-openai # via scrapegraphai tokenizers==0.19.1 - # via anthropic - # via langchain-huggingface # via langchain-mistralai - # via transformers toml==0.10.2 # via streamlit tomli==2.0.1 @@ -565,8 +426,6 @@ tomli==2.0.1 # via pytest tomlkit==0.13.0 # via pylint -torch==2.2.2 - # via sentence-transformers tornado==6.4.1 # via streamlit tqdm==4.66.5 @@ -576,20 +435,13 @@ tqdm==4.66.5 # via openai # via scrapegraphai # via semchunk - # via sentence-transformers - # via transformers -transformers==4.44.0 - # via langchain-huggingface - # via sentence-transformers typing-extensions==4.12.2 # via altair - # via anthropic # via anyio # via astroid # via fastapi # via fastapi-pagination # via google-generativeai - # via groq # via huggingface-hub # via langchain-core # via openai @@ -601,7 +453,6 @@ typing-extensions==4.12.2 # via sqlalchemy # via starlette # via streamlit - # via torch # via typing-inspect # via uvicorn typing-inspect==0.9.0 diff --git a/requirements.lock b/requirements.lock index 1d80e1bf..f3cb5626 100644 --- a/requirements.lock +++ b/requirements.lock @@ -11,17 +11,11 @@ aiohttp==3.9.5 # via langchain # via langchain-community - # via langchain-fireworks - # via langchain-nvidia-ai-endpoints aiosignal==1.3.1 # via aiohttp annotated-types==0.7.0 # via pydantic -anthropic==0.31.2 - # via langchain-anthropic anyio==4.4.0 - # via anthropic - # via groq # via httpx # via openai async-timeout==4.0.3 @@ -37,8 +31,6 @@ boto3==1.34.146 botocore==1.34.146 # via boto3 # via s3transfer -browserbase==0.3.0 - # via scrapegraphai cachetools==5.4.0 # via google-auth certifi==2024.7.4 @@ -49,26 +41,16 @@ charset-normalizer==3.3.2 # via requests dataclasses-json==0.6.7 # via langchain-community -defusedxml==0.7.1 - # via langchain-anthropic dill==0.3.8 # via multiprocess distro==1.9.0 - # via anthropic - # via groq # via openai -docstring-parser==0.16 - # via google-cloud-aiplatform exceptiongroup==1.2.2 # via anyio faiss-cpu==1.8.0.post1 # via scrapegraphai filelock==3.15.4 # via huggingface-hub - # via torch - # via transformers -fireworks-ai==0.14.0 - # via langchain-fireworks free-proxy==1.1.1 # via scrapegraphai frozenlist==1.4.1 @@ -76,7 +58,6 @@ frozenlist==1.4.1 # via aiosignal fsspec==2024.6.1 # via huggingface-hub - # via torch google==3.0.0 # via scrapegraphai google-ai-generativelanguage==0.6.6 @@ -84,11 +65,6 @@ google-ai-generativelanguage==0.6.6 google-api-core==2.19.1 # via google-ai-generativelanguage # via google-api-python-client - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-api-python-client==2.137.0 # via google-generativeai @@ -97,51 +73,19 @@ google-auth==2.32.0 # via google-api-core # via google-api-python-client # via google-auth-httplib2 - # via google-cloud-aiplatform - # via google-cloud-bigquery - # via google-cloud-core - # via google-cloud-resource-manager - # via google-cloud-storage # via google-generativeai google-auth-httplib2==0.2.0 # via google-api-python-client -google-cloud-aiplatform==1.59.0 - # via langchain-google-vertexai -google-cloud-bigquery==3.25.0 - # via google-cloud-aiplatform -google-cloud-core==2.4.1 - # via google-cloud-bigquery - # via google-cloud-storage -google-cloud-resource-manager==1.12.4 - # via google-cloud-aiplatform -google-cloud-storage==2.18.0 - # via google-cloud-aiplatform - # via langchain-google-vertexai -google-crc32c==1.5.0 - # via google-cloud-storage - # via google-resumable-media google-generativeai==0.7.2 # via langchain-google-genai -google-resumable-media==2.7.1 - # via google-cloud-bigquery - # via google-cloud-storage googleapis-common-protos==1.63.2 # via google-api-core - # via grpc-google-iam-v1 # via grpcio-status -graphviz==0.20.3 - # via scrapegraphai greenlet==3.0.3 # via playwright # via sqlalchemy -groq==0.9.0 - # via langchain-groq -grpc-google-iam-v1==0.13.1 - # via google-cloud-resource-manager grpcio==1.65.1 # via google-api-core - # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status grpcio-status==1.62.2 # via google-api-core @@ -155,35 +99,22 @@ httplib2==0.22.0 # via google-api-python-client # via google-auth-httplib2 httpx==0.27.0 - # via anthropic - # via browserbase - # via fireworks-ai - # via groq # via langchain-mistralai # via openai httpx-sse==0.4.0 - # via fireworks-ai # via langchain-mistralai huggingface-hub==0.24.1 - # via langchain-huggingface - # via sentence-transformers # via tokenizers - # via transformers idna==3.7 # via anyio # via httpx # via requests # via yarl -jinja2==3.1.4 - # via torch jiter==0.5.0 - # via anthropic # via openai jmespath==1.0.1 # via boto3 # via botocore -joblib==1.4.2 - # via scikit-learn jsonpatch==1.33 # via langchain-core jsonpointer==3.0.0 @@ -191,40 +122,22 @@ jsonpointer==3.0.0 langchain==0.2.14 # via langchain-community # via scrapegraphai -langchain-anthropic==0.1.20 - # via scrapegraphai langchain-aws==0.1.12 # via scrapegraphai langchain-community==0.2.10 # via scrapegraphai langchain-core==0.2.33 # via langchain - # via langchain-anthropic # via langchain-aws # via langchain-community - # via langchain-fireworks # via langchain-google-genai - # via langchain-google-vertexai - # via langchain-groq - # via langchain-huggingface # via langchain-mistralai - # via langchain-nvidia-ai-endpoints # via langchain-openai # via langchain-text-splitters -langchain-fireworks==0.1.5 - # via scrapegraphai langchain-google-genai==1.0.8 # via scrapegraphai -langchain-google-vertexai==1.0.7 - # via scrapegraphai -langchain-groq==0.1.6 - # via scrapegraphai -langchain-huggingface==0.0.3 - # via scrapegraphai langchain-mistralai==0.1.12 # via scrapegraphai -langchain-nvidia-ai-endpoints==0.1.7 - # via scrapegraphai langchain-openai==0.1.22 # via scrapegraphai langchain-text-splitters==0.2.2 @@ -235,16 +148,12 @@ langsmith==0.1.93 # via langchain-core lxml==5.2.2 # via free-proxy -markupsafe==2.1.5 - # via jinja2 marshmallow==3.21.3 # via dataclasses-json minify-html==0.15.0 # via scrapegraphai mpire==2.10.2 # via semchunk -mpmath==1.3.0 - # via sympy multidict==6.0.5 # via aiohttp # via yarl @@ -252,55 +161,34 @@ multiprocess==0.70.16 # via mpire mypy-extensions==1.0.0 # via typing-inspect -networkx==3.2.1 - # via torch numpy==1.26.4 # via faiss-cpu # via langchain # via langchain-aws # via langchain-community # via pandas - # via scikit-learn - # via scipy - # via sentence-transformers - # via shapely - # via transformers openai==1.41.0 - # via langchain-fireworks # via langchain-openai orjson==3.10.6 # via langsmith packaging==24.1 # via faiss-cpu - # via google-cloud-aiplatform - # via google-cloud-bigquery # via huggingface-hub # via langchain-core # via marshmallow - # via transformers pandas==2.2.2 # via scrapegraphai -pillow==10.4.0 - # via fireworks-ai - # via langchain-nvidia-ai-endpoints - # via sentence-transformers playwright==1.45.1 - # via browserbase # via scrapegraphai # via undetected-playwright proto-plus==1.24.0 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager protobuf==4.25.3 # via google-ai-generativelanguage # via google-api-core - # via google-cloud-aiplatform - # via google-cloud-resource-manager # via google-generativeai # via googleapis-common-protos - # via grpc-google-iam-v1 # via grpcio-status # via proto-plus pyasn1==0.6.0 @@ -309,12 +197,7 @@ pyasn1==0.6.0 pyasn1-modules==0.4.0 # via google-auth pydantic==2.8.2 - # via anthropic - # via browserbase - # via fireworks-ai - # via google-cloud-aiplatform # via google-generativeai - # via groq # via langchain # via langchain-core # via langsmith @@ -329,7 +212,6 @@ pyparsing==3.1.2 # via httplib2 python-dateutil==2.9.0.post0 # via botocore - # via google-cloud-bigquery # via pandas python-dotenv==1.0.1 # via scrapegraphai @@ -340,45 +222,26 @@ pyyaml==6.0.1 # via langchain # via langchain-community # via langchain-core - # via transformers regex==2024.5.15 # via tiktoken - # via transformers requests==2.32.3 # via free-proxy # via google-api-core - # via google-cloud-bigquery - # via google-cloud-storage # via huggingface-hub # via langchain # via langchain-community - # via langchain-fireworks # via langsmith # via tiktoken - # via transformers rsa==4.9 # via google-auth s3transfer==0.10.2 # via boto3 -safetensors==0.4.3 - # via transformers -scikit-learn==1.5.1 - # via sentence-transformers -scipy==1.13.1 - # via scikit-learn - # via sentence-transformers semchunk==2.2.0 # via scrapegraphai -sentence-transformers==3.0.1 - # via langchain-huggingface -shapely==2.0.5 - # via google-cloud-aiplatform six==1.16.0 # via python-dateutil sniffio==1.3.1 - # via anthropic # via anyio - # via groq # via httpx # via openai soupsieve==2.5 @@ -386,24 +249,15 @@ soupsieve==2.5 sqlalchemy==2.0.31 # via langchain # via langchain-community -sympy==1.13.1 - # via torch tenacity==8.5.0 # via langchain # via langchain-community # via langchain-core -threadpoolctl==3.5.0 - # via scikit-learn tiktoken==0.7.0 # via langchain-openai # via scrapegraphai tokenizers==0.19.1 - # via anthropic - # via langchain-huggingface # via langchain-mistralai - # via transformers -torch==2.2.2 - # via sentence-transformers tqdm==4.66.4 # via google-generativeai # via huggingface-hub @@ -411,16 +265,9 @@ tqdm==4.66.4 # via openai # via scrapegraphai # via semchunk - # via sentence-transformers - # via transformers -transformers==4.43.3 - # via langchain-huggingface - # via sentence-transformers typing-extensions==4.12.2 - # via anthropic # via anyio # via google-generativeai - # via groq # via huggingface-hub # via langchain-core # via openai @@ -428,7 +275,6 @@ typing-extensions==4.12.2 # via pydantic-core # via pyee # via sqlalchemy - # via torch # via typing-inspect typing-inspect==0.9.0 # via dataclasses-json diff --git a/requirements.txt b/requirements.txt index 21c2fd3b..80cb0767 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,15 +1,9 @@ langchain>=0.2.14 -langchain-fireworks>=0.1.3 -langchain_community>=0.2.9 langchain-google-genai>=1.0.7 -langchain-google-vertexai>=1.0.7 langchain-openai>=0.1.22 -langchain-groq>=0.1.3 -langchain-aws>=0.1.3 -langchain-anthropic>=0.1.11 langchain-mistralai>=0.1.12 -langchain-huggingface>=0.0.3 -langchain-nvidia-ai-endpoints>=0.1.6 +langchain_community>=0.2.9 +langchain-aws>=0.1.3 html2text>=2024.2.26 faiss-cpu>=1.8.0 beautifulsoup4>=4.12.3 @@ -17,11 +11,9 @@ pandas>=2.2.2 python-dotenv>=1.0.1 tiktoken>=0.7 tqdm>=4.66.4 -graphviz>=0.20.3 minify-html>=0.15.0 free-proxy>=1.1.1 playwright>=1.43.0 -google>=3.0.0 undetected-playwright>=0.3.0 +google>=3.0.0 semchunk>=1.0.1 -browserbase>=0.3.0 diff --git a/tests/graphs/abstract_graph_test.py b/tests/graphs/abstract_graph_test.py new file mode 100644 index 00000000..805a1691 --- /dev/null +++ b/tests/graphs/abstract_graph_test.py @@ -0,0 +1,31 @@ +""" +Tests for the AbstractGraph. +""" +import pytest +from unittest.mock import patch +from scrapegraphai.graphs import AbstractGraph + +class TestAbstractGraph: + @pytest.mark.parametrize("llm_config, expected_model", [ + ({"model": "openai/gpt-3.5-turbo"}, "ChatOpenAI"), + ({"model": "azure_openai/gpt-3.5-turbo"}, "AzureChatOpenAI"), + ({"model": "google_genai/gemini-pro"}, "ChatGoogleGenerativeAI"), + ({"model": "google_vertexai/chat-bison"}, "ChatVertexAI"), + ({"model": "ollama/llama2"}, "Ollama"), + ({"model": "oneapi/text-davinci-003"}, "OneApi"), + ({"model": "nvidia/clara-instant-1-base"}, "ChatNVIDIA"), + ({"model": "deepseek/deepseek-coder-6.7b-instruct"}, "DeepSeek"), + ({"model": "ernie/ernie-bot"}, "ErnieBotChat"), + ]) + def test_create_llm(self, llm_config, expected_model): + graph = AbstractGraph("Test prompt", {"llm": llm_config}) + assert isinstance(graph.llm_model, expected_model) + + def test_create_llm_unknown_provider(self): + with pytest.raises(ValueError): + AbstractGraph("Test prompt", {"llm": {"model": "unknown_provider/model"}}) + + def test_create_llm_error(self): + with patch("your_module.init_chat_model", side_effect=Exception("Test error")): + with pytest.raises(Exception): + AbstractGraph("Test prompt", {"llm": {"model": "openai/gpt-3.5-turbo"}})