ScrapeGraphAI
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎canvas_to_use_the lib.py‎
Lines changed: 0 additions & 14 deletions b/‎canvas_to_use_the lib.py‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎docs/assets/graph_pyecharts.png‎
83.9 KB b/‎docs/assets/graph_pyecharts.png‎
83.9 KB
diff --git a/‎examples/example_renderer.py‎
Lines changed: 43 additions & 0 deletions b/‎examples/example_renderer.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎examples/extract_entities_json_schema_from_pdf.py‎
Lines changed: 8 additions & 3 deletions b/‎examples/extract_entities_json_schema_from_pdf.py‎
Lines changed: 8 additions & 3 deletions
diff --git a/‎examples/extract_entities_relations_from_pdf.py‎
Lines changed: 22 additions & 10 deletions b/‎examples/extract_entities_relations_from_pdf.py‎
Lines changed: 22 additions & 10 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 10 additions & 8 deletions b/‎pyproject.toml‎
Lines changed: 10 additions & 8 deletions
@@ -1,6 +1,7 @@
 # ScrapeSchema
 
-ScrapeSchema is a Python-based library designed to extract entities and relationship from files. 
+![graph](docs/assets/graph_pyecharts.png)
+
 The generate schemas can be used to infer from document to use for tables in a database or for generating knowledge graph.
 
 ## Features
 
@@ -0,0 +1,43 @@
+from scrapeschema import Entity, Relation
+from scrapeschema.renderers import PyechartsRenderer
+
+# Define entities with nested attributes
+entities = [
+    Entity(id="1", type="Person", attributes={
+        "name": "Alice", 
+        "age": 30, 
+        "address": {
+            "city": "New York",
+            "zip": "10001"
+        }
+    }),
+    Entity(id="2", type="Person", attributes={
+        "name": "Bob", 
+        "age": 40,
+        "address": {
+            "city": "Los Angeles",
+            "zip": "90001"
+        }
+    }),
+    Entity(id="3", type="Company", attributes={
+        "name": "Acme Corp", 
+        "industry": "Tech",
+        "headquarters": {
+            "city": "San Francisco",
+            "zip": "94105"
+        }
+    })
+]
+
+# Define relations between the entities
+relations = [
+    Relation(id="r1", source="1", target="2", name="Friend"),
+    Relation(id="r2", source="1", target="3", name="Employee"),
+    Relation(id="r3", source="2", target="3", name="Employer"),
+]
+
+# Initialize the PyechartsRenderer
+renderer = PyechartsRenderer(repulsion=2000, title="Graph Example with Nested Entities")
+
+# Render the graph using the provided nodes and links
+graph = renderer.render(entities, relations, output_path="graph_nested.html")
@@ -6,11 +6,16 @@ def main():
     load_dotenv()  # Load environment variables from .env file
     api_key = os.getenv("OPENAI_API_KEY")
 
-    # Path to your PDF file
-    pdf_path = "./test.pdf"
+    # get current directory
+    curr_dirr = os.path.dirname(os.path.abspath(__file__))
+    pdf_name = "test.pdf"
+    pdf_path = os.path.join(curr_dirr, pdf_name)
 
     # Create a PDFParser instance with the API key
-    pdf_parser = PDFParser(api_key)
+    pdf_parser = PDFParser(
+        api_key=api_key,
+        model="gpt-4o-mini"
+    )
 
     # Create a FileExtraxctor instance with the PDF parser
     pdf_extractor = FileExtractor(pdf_path, pdf_parser)
 
@@ -1,13 +1,19 @@
 from scrapeschema import FileExtractor, PDFParser
+from scrapeschema.renderers import PyechartsRenderer
 import os
 from dotenv import load_dotenv
+load_dotenv()  # Load environment variables from .env file
 
-def main():
-    load_dotenv()  # Load environment variables from .env file
-    api_key = os.getenv("OPENAI_API_KEY")
+# Get the OpenAI API key from the environment variables
+api_key = os.getenv("OPENAI_API_KEY")
+
+# get current directory
+curr_dirr = os.path.dirname(os.path.abspath(__file__))
 
-    # Path to your PDF file
-    pdf_path = "./test.pdf"
+def main():
+    # Path to the PDF file
+    pdf_name = "test.pdf"
+    pdf_path = os.path.join(curr_dirr, pdf_name)
 
     # Create a PDFParser instance with the API key
     pdf_parser = PDFParser(api_key)
@@ -17,12 +23,18 @@ def main():
 
     # Extract entities from the PDF
     entities = pdf_extractor.extract_entities()
+    relations = pdf_extractor.extract_relations()
 
-    print(entities)
+    # Initialize the PyechartsRenderer
+    renderer = PyechartsRenderer(repulsion=2000, title="Entity-Relationship Graph")
 
-    relations = pdf_extractor.extract_relations()
-    print(relations)
-    
+    # Render the graph using the provided nodes and links
+    graph = renderer.render(entities, relations, output_path="graph.html")
+
+    print(graph)
 
 if __name__ == "__main__":
-    main()
+    main()
+
+
+    
@@ -1,8 +1,7 @@
 [project]
-
 name = "scrapeschema"
 version = "0.0.1"
-description = "library for creating ontologies from documents"
+description = "Library for creating ontologies from documents using LLM"
 authors = [
     { name = "Marco Vinciguerra", email = "mvincig11@gmail.com" },
     { name = "Marco Perini", email = "perinim.98@gmail.com" },
@@ -13,7 +12,6 @@ dependencies = [
     "certifi==2024.7.4",
     "charset-normalizer==3.3.2",
     "idna==3.8",
-    "pdf2image==1.17.0",
     "pillow==10.4.0",
     "python-dotenv==1.0.1",
     "requests==2.32.3",
@@ -26,6 +24,10 @@ homepage = "https://scrapegraphai.com/"
 repository = "https://github.com/ScrapeGraphAI/ScrapeSchema"
 documentation = ""
 keywords = [
+    "scrapeschema",
+    "ontologies",
+    "documents",
+    "knowledge graph",
     "scrapegraph",
     "scrapegraphai",
     "langchain",
@@ -53,7 +55,7 @@ classifiers = [
 requires-python = ">=3.9,<4.0"
 
 [project.optional-dependencies]
-burr = ["burr[start]==0.22.1"]
+renderers = ["pyecharts==2.0.6"]
 docs = ["sphinx==6.0", "furo==2024.5.6"]
 
 [build-system]
@@ -65,12 +67,12 @@ managed = true
 dev-dependencies = [
     "pytest==8.0.0",
     "pytest-mock==3.14.0",
-    "-e file:.[burr]",
+    "-e file:.[renderers]",
     "-e file:.[docs]",
     "pylint>=3.2.5",
 ]
+
 [tool.rye.scripts]
-pylint-local = "pylint scrapegraphai/**/*.py"
-pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py"
+pylint-local = "pylint scrapeschema/**/*.py"
+pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapeschema/**/*.py"
 update-requirements = "python 'manual deployment/autorequirements.py'"
-