diff --git a/README.md b/README.md index 6d32729..7a0d4c6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # ScrapeSchema -ScrapeSchema is a Python-based library designed to extract entities and relationship from files. +![graph](docs/assets/graph_pyecharts.png) + The generate schemas can be used to infer from document to use for tables in a database or for generating knowledge graph. ## Features diff --git a/canvas_to_use_the lib.py b/canvas_to_use_the lib.py deleted file mode 100644 index e3539f5..0000000 --- a/canvas_to_use_the lib.py +++ /dev/null @@ -1,14 +0,0 @@ -from scrapeschema # import what you need from the library -import os -from dotenv import load_dotenv - -def main(): - load_dotenv() # Load environment variables from .env file - api_key = os.getenv("OPENAI_API_KEY") - - # Path to your file - file_path = "path/to/your/file" - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/docs/assets/graph_pyecharts.png b/docs/assets/graph_pyecharts.png new file mode 100644 index 0000000..1189d68 Binary files /dev/null and b/docs/assets/graph_pyecharts.png differ diff --git a/examples/example_renderer.py b/examples/example_renderer.py new file mode 100644 index 0000000..5a78a80 --- /dev/null +++ b/examples/example_renderer.py @@ -0,0 +1,43 @@ +from scrapeschema import Entity, Relation +from scrapeschema.renderers import PyechartsRenderer + +# Define entities with nested attributes +entities = [ + Entity(id="1", type="Person", attributes={ + "name": "Alice", + "age": 30, + "address": { + "city": "New York", + "zip": "10001" + } + }), + Entity(id="2", type="Person", attributes={ + "name": "Bob", + "age": 40, + "address": { + "city": "Los Angeles", + "zip": "90001" + } + }), + Entity(id="3", type="Company", attributes={ + "name": "Acme Corp", + "industry": "Tech", + "headquarters": { + "city": "San Francisco", + "zip": "94105" + } + }) +] + +# Define relations between the entities +relations = [ + Relation(id="r1", source="1", target="2", name="Friend"), + Relation(id="r2", source="1", target="3", name="Employee"), + Relation(id="r3", source="2", target="3", name="Employer"), +] + +# Initialize the PyechartsRenderer +renderer = PyechartsRenderer(repulsion=2000, title="Graph Example with Nested Entities") + +# Render the graph using the provided nodes and links +graph = renderer.render(entities, relations, output_path="graph_nested.html") diff --git a/examples/extract_entities_json_schema_from_pdf.py b/examples/extract_entities_json_schema_from_pdf.py index 634c6c8..2380f28 100644 --- a/examples/extract_entities_json_schema_from_pdf.py +++ b/examples/extract_entities_json_schema_from_pdf.py @@ -6,11 +6,16 @@ def main(): load_dotenv() # Load environment variables from .env file api_key = os.getenv("OPENAI_API_KEY") - # Path to your PDF file - pdf_path = "./test.pdf" + # get current directory + curr_dirr = os.path.dirname(os.path.abspath(__file__)) + pdf_name = "test.pdf" + pdf_path = os.path.join(curr_dirr, pdf_name) # Create a PDFParser instance with the API key - pdf_parser = PDFParser(api_key) + pdf_parser = PDFParser( + api_key=api_key, + model="gpt-4o-mini" + ) # Create a FileExtraxctor instance with the PDF parser pdf_extractor = FileExtractor(pdf_path, pdf_parser) diff --git a/examples/extract_entities_relations_from_pdf.py b/examples/extract_entities_relations_from_pdf.py index e1eec5c..3f00ffc 100644 --- a/examples/extract_entities_relations_from_pdf.py +++ b/examples/extract_entities_relations_from_pdf.py @@ -1,13 +1,19 @@ from scrapeschema import FileExtractor, PDFParser +from scrapeschema.renderers import PyechartsRenderer import os from dotenv import load_dotenv +load_dotenv() # Load environment variables from .env file -def main(): - load_dotenv() # Load environment variables from .env file - api_key = os.getenv("OPENAI_API_KEY") +# Get the OpenAI API key from the environment variables +api_key = os.getenv("OPENAI_API_KEY") + +# get current directory +curr_dirr = os.path.dirname(os.path.abspath(__file__)) - # Path to your PDF file - pdf_path = "./test.pdf" +def main(): + # Path to the PDF file + pdf_name = "test.pdf" + pdf_path = os.path.join(curr_dirr, pdf_name) # Create a PDFParser instance with the API key pdf_parser = PDFParser(api_key) @@ -17,12 +23,18 @@ def main(): # Extract entities from the PDF entities = pdf_extractor.extract_entities() + relations = pdf_extractor.extract_relations() - print(entities) + # Initialize the PyechartsRenderer + renderer = PyechartsRenderer(repulsion=2000, title="Entity-Relationship Graph") - relations = pdf_extractor.extract_relations() - print(relations) - + # Render the graph using the provided nodes and links + graph = renderer.render(entities, relations, output_path="graph.html") + + print(graph) if __name__ == "__main__": - main() \ No newline at end of file + main() + + + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e4e2783..4d17971 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,7 @@ [project] - name = "scrapeschema" version = "0.0.1" -description = "library for creating ontologies from documents" +description = "Library for creating ontologies from documents using LLM" authors = [ { name = "Marco Vinciguerra", email = "mvincig11@gmail.com" }, { name = "Marco Perini", email = "perinim.98@gmail.com" }, @@ -13,7 +12,6 @@ dependencies = [ "certifi==2024.7.4", "charset-normalizer==3.3.2", "idna==3.8", - "pdf2image==1.17.0", "pillow==10.4.0", "python-dotenv==1.0.1", "requests==2.32.3", @@ -26,6 +24,10 @@ homepage = "https://scrapegraphai.com/" repository = "https://github.com/ScrapeGraphAI/ScrapeSchema" documentation = "" keywords = [ + "scrapeschema", + "ontologies", + "documents", + "knowledge graph", "scrapegraph", "scrapegraphai", "langchain", @@ -53,7 +55,7 @@ classifiers = [ requires-python = ">=3.9,<4.0" [project.optional-dependencies] -burr = ["burr[start]==0.22.1"] +renderers = ["pyecharts==2.0.6"] docs = ["sphinx==6.0", "furo==2024.5.6"] [build-system] @@ -65,12 +67,12 @@ managed = true dev-dependencies = [ "pytest==8.0.0", "pytest-mock==3.14.0", - "-e file:.[burr]", + "-e file:.[renderers]", "-e file:.[docs]", "pylint>=3.2.5", ] + [tool.rye.scripts] -pylint-local = "pylint scrapegraphai/**/*.py" -pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py" +pylint-local = "pylint scrapeschema/**/*.py" +pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapeschema/**/*.py" update-requirements = "python 'manual deployment/autorequirements.py'" - diff --git a/requirements-dev.lock b/requirements-dev.lock index 1630533..fbcc51d 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -8,206 +8,77 @@ # with-sources: false -e file:. -aiofiles==24.1.0 - # via burr alabaster==0.7.16 # via sphinx -altair==5.4.1 - # via streamlit -annotated-types==0.7.0 - # via pydantic -anyio==4.4.0 - # via httpx - # via openai - # via starlette astroid==3.2.4 # via pylint -attrs==24.2.0 - # via jsonschema - # via referencing babel==2.16.0 # via sphinx beautifulsoup4==4.12.3 # via furo -blinker==1.8.2 - # via streamlit -burr==0.22.1 - # via scrapeschema -cachetools==5.5.0 - # via streamlit certifi==2024.7.4 - # via httpcore - # via httpx # via requests # via scrapeschema charset-normalizer==3.3.2 # via requests # via scrapeschema -click==8.1.7 - # via burr - # via streamlit - # via uvicorn -contourpy==1.3.0 - # via matplotlib -cycler==0.12.1 - # via matplotlib +colorama==0.4.6 + # via pylint + # via pytest + # via sphinx dill==0.3.8 # via pylint -distro==1.9.0 - # via openai docutils==0.19 # via sphinx exceptiongroup==1.2.2 - # via anyio # via pytest -fastapi==0.114.1 - # via burr -fastapi-pagination==0.12.27 - # via burr -fonttools==4.53.1 - # via matplotlib furo==2024.5.6 # via scrapeschema -gitdb==4.0.11 - # via gitpython -gitpython==3.1.43 - # via streamlit -graphviz==0.20.3 - # via burr -h11==0.14.0 - # via httpcore - # via uvicorn -httpcore==1.0.5 - # via httpx -httpx==0.27.2 - # via openai idna==3.8 - # via anyio - # via httpx # via requests # via scrapeschema imagesize==1.4.1 # via sphinx importlib-metadata==8.5.0 # via sphinx -importlib-resources==6.4.5 - # via matplotlib iniconfig==2.0.0 # via pytest isort==5.13.2 # via pylint jinja2==3.1.4 - # via altair - # via burr - # via pydeck + # via pyecharts # via sphinx -jiter==0.5.0 - # via openai -jsonschema==4.23.0 - # via altair -jsonschema-specifications==2023.12.1 - # via jsonschema -kiwisolver==1.4.7 - # via matplotlib -loguru==0.7.2 - # via burr -markdown-it-py==3.0.0 - # via rich markupsafe==2.1.5 # via jinja2 -matplotlib==3.9.2 - # via burr mccabe==0.7.0 # via pylint -mdurl==0.1.2 - # via markdown-it-py -mypy-extensions==1.0.0 - # via typing-inspect -narwhals==1.7.0 - # via altair -numpy==2.0.2 - # via contourpy - # via matplotlib - # via pandas - # via pyarrow - # via pydeck - # via sf-hamilton - # via streamlit -openai==1.44.1 - # via burr packaging==24.1 - # via altair - # via matplotlib # via pytest # via sphinx - # via streamlit -pandas==2.2.2 - # via sf-hamilton - # via streamlit -pdf2image==1.17.0 - # via scrapeschema pillow==10.4.0 - # via matplotlib - # via pdf2image # via scrapeschema - # via streamlit platformdirs==4.3.2 # via pylint pluggy==1.5.0 # via pytest -protobuf==5.28.1 - # via streamlit -pyarrow==17.0.0 - # via streamlit -pydantic==2.9.1 - # via burr - # via fastapi - # via fastapi-pagination - # via openai -pydantic-core==2.23.3 - # via pydantic -pydeck==0.9.1 - # via streamlit +prettytable==3.11.0 + # via pyecharts +pyecharts==2.0.6 + # via scrapeschema pygments==2.18.0 # via furo - # via rich # via sphinx pylint==3.2.7 -pyparsing==3.1.4 - # via matplotlib pytest==8.0.0 # via pytest-mock pytest-mock==3.14.0 -python-dateutil==2.9.0.post0 - # via matplotlib - # via pandas python-dotenv==1.0.1 # via scrapeschema -pytz==2024.2 - # via pandas -referencing==0.35.1 - # via jsonschema - # via jsonschema-specifications requests==2.32.3 - # via burr # via scrapeschema # via sphinx - # via streamlit -rich==13.8.1 - # via streamlit -rpds-py==0.20.0 - # via jsonschema - # via referencing -sf-hamilton==1.76.0 - # via burr -six==1.16.0 - # via python-dateutil -smmap==5.0.1 - # via gitdb -sniffio==1.3.1 - # via anyio - # via httpx - # via openai +simplejson==3.19.3 + # via pyecharts snowballstemmer==2.2.0 # via sphinx soupsieve==2.6 @@ -230,47 +101,18 @@ sphinxcontrib-qthelp==2.0.0 # via sphinx sphinxcontrib-serializinghtml==2.0.0 # via sphinx -starlette==0.38.5 - # via fastapi -streamlit==1.38.0 - # via burr -tenacity==8.5.0 - # via streamlit -toml==0.10.2 - # via streamlit tomli==2.0.1 # via pylint # via pytest tomlkit==0.13.2 # via pylint -tornado==6.4.1 - # via streamlit -tqdm==4.66.5 - # via openai typing-extensions==4.12.2 - # via altair - # via anyio # via astroid - # via fastapi - # via fastapi-pagination - # via openai - # via pydantic - # via pydantic-core # via pylint - # via sf-hamilton - # via starlette - # via streamlit - # via typing-inspect - # via uvicorn -typing-inspect==0.9.0 - # via sf-hamilton -tzdata==2024.1 - # via pandas urllib3==2.2.2 # via requests # via scrapeschema -uvicorn==0.30.6 - # via burr +wcwidth==0.2.13 + # via prettytable zipp==3.20.1 # via importlib-metadata - # via importlib-resources diff --git a/requirements.lock b/requirements.lock index bb1930e..8062277 100644 --- a/requirements.lock +++ b/requirements.lock @@ -17,10 +17,7 @@ charset-normalizer==3.3.2 idna==3.8 # via requests # via scrapeschema -pdf2image==1.17.0 - # via scrapeschema pillow==10.4.0 - # via pdf2image # via scrapeschema python-dotenv==1.0.1 # via scrapeschema diff --git a/scrapeschema/extractor.py b/scrapeschema/extractor.py index 42808e4..971b1eb 100644 --- a/scrapeschema/extractor.py +++ b/scrapeschema/extractor.py @@ -30,7 +30,7 @@ def __init__(self, file_path: str, parser: BaseParser): def extract_entities(self) -> List[Entity]: new_entities = self.parser.extract_entities(self.file_path) - return self.update_entities(new_entities) + return new_entities def extract_relations(self) -> List[Relation]: return self.parser.extract_relations(self.file_path) diff --git a/scrapeschema/parsers/pdf_parser.py b/scrapeschema/parsers/pdf_parser.py index 38e36fc..c7536d2 100644 --- a/scrapeschema/parsers/pdf_parser.py +++ b/scrapeschema/parsers/pdf_parser.py @@ -4,8 +4,6 @@ import base64 import os import tempfile -from pdf2image import convert_from_path -from pdf2image.exceptions import PDFInfoNotInstalledError, PDFPageCountError, PDFSyntaxError import requests import json from .prompts import DIGRAPH_EXAMPLE_PROMPT, JSON_SCHEMA_PROMPT, RELATIONS_PROMPT, UPDATE_ENTITIES_PROMPT @@ -13,6 +11,7 @@ import inspect import subprocess import logging +import re # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') @@ -88,10 +87,13 @@ def load_pdf_as_images(pdf_path: str) -> Optional[List[Image.Image]]: if not os.path.exists(image_path): break logging.info(f"Loading image: {image_path}") - images.append(Image.open(image_path)) + + # Using context manager to ensure the file is closed properly after use + with Image.open(image_path) as img: + # Append a copy of the image to the list, closing the original image + images.append(img.copy()) page_num += 1 - logging.info(f"Total pages processed: {page_num - 1}") return images except subprocess.CalledProcessError as e: @@ -100,9 +102,6 @@ def load_pdf_as_images(pdf_path: str) -> Optional[List[Image.Image]]: logging.error(f"Command error: {e.stderr}") return None - # The temporary directory and its contents are automatically cleaned up - # when exiting the 'with' block - def save_image_to_temp(image: Image.Image) -> str: """ Saves an image to a temporary file. @@ -117,7 +116,7 @@ def save_image_to_temp(image: Image.Image) -> str: image.save(temp_file.name, 'JPEG') return temp_file.name -def process_pdf(pdf_path: str) -> List[str] or None: # type: ignore +def process_pdf(pdf_path: str) -> Optional[List[str]]: """ Processes a PDF file and converts each page to a base64 encoded image. @@ -130,17 +129,29 @@ def process_pdf(pdf_path: str) -> List[str] or None: # type: ignore if not os.path.exists(pdf_path): raise FileNotFoundError(f"PDF file not found: {pdf_path}") + # Load PDF as images images = load_pdf_as_images(pdf_path) if not images: return None base64_images = [] + for page_num, image in enumerate(images, start=1): - temp_image_path = save_image_to_temp(image) - base64_image = encode_image(temp_image_path) - base64_images.append(base64_image) - os.unlink(temp_image_path) - print(f"Processed page {page_num}") + temp_image_path = None + try: + # Save image to temporary file + temp_image_path = save_image_to_temp(image) + + # Convert image to base64 + base64_image = encode_image(temp_image_path) + base64_images.append(base64_image) + + except Exception as e: + logging.error(f"Error processing page {page_num}: {e}") + finally: + # Ensure temp file is deleted even in case of an error + if temp_image_path and os.path.exists(temp_image_path): + os.unlink(temp_image_path) return base64_images @@ -178,6 +189,20 @@ def traverse_schema(schema: Dict[str, Any], parent_id: str = None): traverse_schema(entities_json_schema) return entities + def _extract_json_content(self, input_string: str) -> str: + # Use regex to match content between ```json and ``` + match = re.search(r"```json\s*(.*?)\s*```", input_string, re.DOTALL) + if match: + return match.group(1).strip() + return "" + + def _extract_python_content(self, input_string: str) -> str: + # Use regex to match content between ```python and ``` + match = re.search(r"```python\s*(.*?)\s*```", input_string, re.DOTALL) + if match: + return match.group(1).strip() + return "" + def update_entities(self, new_entities: List[Entity]) -> List[Entity]: existing_entities = self._entities @@ -189,7 +214,8 @@ def update_entities(self, new_entities: List[Entity]) -> List[Entity]: # Use _get_llm_response instead of direct API call response = self._get_llm_response(prompt) - response = response[8:-3] # Remove ```json and ``` if present + # parse the response which is inside ```json and ```, use regex to extract the json + response = self._extract_json_content(response) try: updated_entities_data = json.loads(response) @@ -225,6 +251,7 @@ def extract_relations(self, file_path: str) -> List[Relation]: # Use _get_llm_response instead of direct API call relations_answer_code = self._get_llm_response(relations_prompt) + relations_answer_code = self._extract_python_content(relations_answer_code) # Create a new dictionary to store the local variables local_vars = {} @@ -285,7 +312,7 @@ def entities_json_schema(self, file_path: str) -> Dict[str, Any]: if base64_images: page_answers = self._generate_json_schema(base64_images) json_schema = self._merge_json_schemas(page_answers) - json_schema = json_schema[8:-3] + json_schema = self._extract_json_content(json_schema) print("\n PDF JSON Schema:") print(json_schema) @@ -329,6 +356,7 @@ def _generate_json_schema(self, base64_images: List[str]) -> List[str]: # Use _get_llm_response with image answer = self._get_llm_response(prompt, image_url=f"data:image/jpeg;base64,{base64_image}") + answer = self._extract_json_content(answer) page_answers.append(f"Page {page_num}: {answer}") print(f"Processed page {page_num}") @@ -337,7 +365,7 @@ def _generate_json_schema(self, base64_images: List[str]) -> List[str]: def _merge_json_schemas(self, page_answers: List[str]) -> str: digraph_prompt = "Generate a unique json schema starting from the following \ \n\n" + "\n\n".join(page_answers) + "\n\n \ - Remember to provide only the json schema, without any comments before or after the json schema" + Remember to provide only the json schema without any comments, wrapped in backticks (`) like ```json ... ``` and nothing else." # Use _get_llm_response instead of direct API call digraph_code = self._get_llm_response(digraph_prompt) diff --git a/scrapeschema/parsers/prompts.py b/scrapeschema/parsers/prompts.py index dea88f9..be819d3 100644 --- a/scrapeschema/parsers/prompts.py +++ b/scrapeschema/parsers/prompts.py @@ -38,7 +38,8 @@ """ JSON_SCHEMA_PROMPT = """ -Extract the schema of the meaningful entities in this document, I want something like: +Extract the schema of the meaningful entities in this document, I want something like:\n +```json { "$schema": "http://json-schema.org/schema#", "title": "Payslip", @@ -151,6 +152,7 @@ "payslip" ] } +``` """ RELATIONS_PROMPT = """ @@ -159,7 +161,8 @@ Find meaningfull relations among this entities, give the relations with the following structure: {relation_class} Remember to give only and exclusively the Python code for generating the relations, nothing else. -No intro, no code block, no nothing, just the code and remember to insert the following imports: +You must wrap the code in triple backticks (```) like ```python ... ``` and nothing else. +You must insert the following imports in the code:\n from dataclasses import dataclass from typing import Any, Dict, List """ @@ -198,5 +201,5 @@ {new_entities} Please provide the updated list of entities as a JSON array. Each entity should be a JSON object with 'id', 'type', and 'attributes' fields. -provide only the JSON, nothing else, nothing before or after the JSON. +Provide only the JSON array, wrapped in backticks (`) like ```json ... ``` and nothing else. """ \ No newline at end of file diff --git a/scrapeschema/renderers/__init__.py b/scrapeschema/renderers/__init__.py new file mode 100644 index 0000000..ca5a59d --- /dev/null +++ b/scrapeschema/renderers/__init__.py @@ -0,0 +1,5 @@ +from .pyecharts_renderer import PyechartsRenderer + +__all__ = [ + "PyechartsRenderer", +] \ No newline at end of file diff --git a/scrapeschema/renderers/base.py b/scrapeschema/renderers/base.py new file mode 100644 index 0000000..3fe2e10 --- /dev/null +++ b/scrapeschema/renderers/base.py @@ -0,0 +1,10 @@ +from ..primitives import Entity, Relation + +from abc import ABC, abstractmethod +from typing import List + + +class BaseRenderer(ABC): + @abstractmethod + def render(self, entities: List[Entity], relations: List[Relation]) -> None: + pass diff --git a/scrapeschema/renderers/pyecharts_renderer.py b/scrapeschema/renderers/pyecharts_renderer.py new file mode 100644 index 0000000..e217916 --- /dev/null +++ b/scrapeschema/renderers/pyecharts_renderer.py @@ -0,0 +1,154 @@ +from .base import BaseRenderer +from ..primitives import Entity, Relation + +from itertools import cycle +from pyecharts import options as opts +from pyecharts.charts import Graph +import json + + +class PyechartsRenderer(BaseRenderer): + """ + PyechartsRenderer is a renderer that uses Pyecharts to visualize the entity-relationship graph. + + Args: + repulsion (int): The repulsion force between nodes. Defaults to 2000. + title (str): The title of the graph. Defaults to "Entity-Relationship Graph". + + Returns: + Graph: A Pyecharts Graph object representing the entity-relationship graph. + """ + + def __init__(self, repulsion: int = 2000, title: str = "Entity-Relationship Graph"): + self.repulsion = repulsion + self.title = title + self.color_palette = [ + "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd", + "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf" + ] # List of colors + self.color_cycle = cycle(self.color_palette) # Create a cycle of colors for reuse + + def assign_colors(self, entities: list[Entity]) -> dict: + """Assign colors to each unique entity type dynamically.""" + type_to_color = {} + for entity in entities: + if entity.type not in type_to_color: + type_to_color[entity.type] = next(self.color_cycle) # Assign a new color + return type_to_color + + def extract_tooltip_info(self, attributes: dict) -> str: + """Extract information for the tooltip. Convert nested attributes to a readable JSON format.""" + return json.dumps(attributes, indent=2) # Formats attributes as a pretty JSON string + + def render(self, entities: list[Entity], relations: list[Relation], output_path: str = None) -> Graph: + # Assign colors dynamically based on the entity type + type_to_color = self.assign_colors(entities) + + # Prepare nodes as dictionaries, with labels showing only the entity ID and tooltips showing detailed info + nodes = [ + { + "name": entity.id, + "symbolSize": 50, # Adjust node size + "label": { + "formatter": f"{entity.id}" # Show only the entity id on the node + }, + "value": entity.type, # Use entity type as the value + "tooltip": { + "formatter": f"Type: {entity.type}\n{self.extract_tooltip_info(entity.attributes)}" + }, # Tooltip shows detailed attributes (nested data as JSON) + "itemStyle": {"color": type_to_color[entity.type]}, # Use dynamically assigned color + } + for entity in entities + ] + + # Prepare links based only on actual relations, with tooltips disabled + links = [ + {"source": relation.source, "target": relation.target, "tooltip": {"show": False}} + for relation in relations + ] + + # Create the graph + graph = ( + Graph() + .add( + "", + nodes, + links, + layout="force", # Use force-directed layout to allow drag-and-drop + repulsion=self.repulsion, # Controls the repulsion force between nodes + is_roam=True, # Allow zooming and panning + is_draggable=True, # Enable dragging of nodes + edge_symbol=["none", "arrow"], # Add arrows to the links + edge_symbol_size=[10, 10], # Size of the arrow + linestyle_opts=opts.LineStyleOpts(width=1, curve=0.2, opacity=0.7), # Customize the lines + label_opts=opts.LabelOpts(is_show=True, position="right"), # Show labels for the nodes + ) + .set_global_opts( + title_opts=opts.TitleOpts(title=self.title), + toolbox_opts=opts.ToolboxOpts( + is_show=True, + feature=opts.ToolBoxFeatureOpts( + save_as_image=opts.ToolBoxFeatureSaveAsImageOpts( + title="Save as Image", # Set label to English + name="graph_image", + ), + restore=opts.ToolBoxFeatureRestoreOpts( + title="Restore", # Set label to English + ), + data_view=opts.ToolBoxFeatureDataViewOpts( + title="Data View", + lang=["Data View", "Close", "Refresh"], # Set language to English + ), + magic_type=None, # Remove the magic type icons (the graph switching icons) + data_zoom=None, # Remove the zoom icon + brush=None, # Remove brush icon + ), + ), + ) + .set_series_opts( + label_opts=opts.LabelOpts(is_show=True), + ) + ) + + # Set the chart to fill the screen + graph.width = "100%" + graph.height = "100%" + + # Save the graph to the output path + if output_path: + graph.render(output_path) + + # Add the full-screen CSS after rendering + with open(output_path, "r") as file: + html_content = file.read() + + full_screen_css = """ + + """ + + # Insert the CSS before closing the head tag + html_content = html_content.replace("", full_screen_css + "") + + # Write the updated content back to the file + with open(output_path, "w") as file: + file.write(html_content) + + return graph \ No newline at end of file