diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py index 16964d84..8cf11f35 100644 --- a/scrapegraphai/graphs/smart_scraper_graph.py +++ b/scrapegraphai/graphs/smart_scraper_graph.py @@ -2,10 +2,14 @@ SmartScraperGraph Module """ +import logging from typing import Optional, Type from pydantic import BaseModel +# Initialize logger +logger = logging.getLogger(__name__) + from ..nodes import ( ConditionalNode, FetchNode, @@ -92,9 +96,12 @@ def _create_graph(self) -> BaseGraph: user_prompt=self.prompt, ) - # Print the response - print(f"Request ID: {response['request_id']}") - print(f"Result: {response['result']}") + # Use logging instead of print for better production practices + if 'request_id' in response and 'result' in response: + logger.info(f"Request ID: {response['request_id']}") + logger.info(f"Result: {response['result']}") + else: + logger.warning("Missing expected keys in response.") sgai_client.close() diff --git a/scrapegraphai/utils/__init__.py b/scrapegraphai/utils/__init__.py index df9118c1..3a4e7b13 100644 --- a/scrapegraphai/utils/__init__.py +++ b/scrapegraphai/utils/__init__.py @@ -43,7 +43,7 @@ from .proxy_rotation import Proxy, parse_or_search_proxy, search_proxy_servers from .save_audio_from_bytes import save_audio_from_bytes from .save_code_to_file import save_code_to_file -from .schema_trasform import transform_schema +from .schema_trasform import transform_schema # Note: filename has typo but kept for compatibility from .screenshot_scraping.screenshot_preparation import ( crop_image, select_area_with_ipywidget, diff --git a/scrapegraphai/utils/schema_trasform.py b/scrapegraphai/utils/schema_trasform.py index 8ac968a0..91f4c033 100644 --- a/scrapegraphai/utils/schema_trasform.py +++ b/scrapegraphai/utils/schema_trasform.py @@ -1,5 +1,5 @@ """ -This utility function trasfrom the pydantic schema into a more comprehensible schema. +This utility function transforms the pydantic schema into a more comprehensible schema. """ @@ -19,15 +19,20 @@ def process_properties(properties): for key, value in properties.items(): if "type" in value: if value["type"] == "array": - if "$ref" in value["items"]: + if "items" in value and "$ref" in value["items"]: ref_key = value["items"]["$ref"].split("/")[-1] - result[key] = [ - process_properties( - pydantic_schema["$defs"][ref_key]["properties"] - ) - ] - else: + if "$defs" in pydantic_schema and ref_key in pydantic_schema["$defs"]: + result[key] = [ + process_properties( + pydantic_schema["$defs"][ref_key].get("properties", {}) + ) + ] + else: + result[key] = ["object"] # fallback for missing reference + elif "items" in value and "type" in value["items"]: result[key] = [value["items"]["type"]] + else: + result[key] = ["unknown"] # fallback for malformed array else: result[key] = { "type": value["type"], @@ -35,9 +40,14 @@ def process_properties(properties): } elif "$ref" in value: ref_key = value["$ref"].split("/")[-1] - result[key] = process_properties( - pydantic_schema["$defs"][ref_key]["properties"] - ) + if "$defs" in pydantic_schema and ref_key in pydantic_schema["$defs"]: + result[key] = process_properties( + pydantic_schema["$defs"][ref_key].get("properties", {}) + ) + else: + result[key] = {"type": "object", "description": "Missing reference"} # fallback return result + if "properties" not in pydantic_schema: + raise ValueError("Invalid pydantic schema: missing 'properties' key") return process_properties(pydantic_schema["properties"])