Skip to content

Commit f40c25f

Browse files
authored
Merge pull request #17 from ScrapeGraphAI/10-interactive-schema-visualization
10 interactive schema visualization
2 parents 8728bde + c50c31c commit f40c25f

15 files changed

+318
-230
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# ScrapeSchema
22

3-
ScrapeSchema is a Python-based library designed to extract entities and relationship from files.
3+
![graph](docs/assets/graph_pyecharts.png)
4+
45
The generate schemas can be used to infer from document to use for tables in a database or for generating knowledge graph.
56

67
## Features

canvas_to_use_the lib.py

Lines changed: 0 additions & 14 deletions
This file was deleted.

docs/assets/graph_pyecharts.png

83.9 KB
Loading

examples/example_renderer.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from scrapeschema import Entity, Relation
2+
from scrapeschema.renderers import PyechartsRenderer
3+
4+
# Define entities with nested attributes
5+
entities = [
6+
Entity(id="1", type="Person", attributes={
7+
"name": "Alice",
8+
"age": 30,
9+
"address": {
10+
"city": "New York",
11+
"zip": "10001"
12+
}
13+
}),
14+
Entity(id="2", type="Person", attributes={
15+
"name": "Bob",
16+
"age": 40,
17+
"address": {
18+
"city": "Los Angeles",
19+
"zip": "90001"
20+
}
21+
}),
22+
Entity(id="3", type="Company", attributes={
23+
"name": "Acme Corp",
24+
"industry": "Tech",
25+
"headquarters": {
26+
"city": "San Francisco",
27+
"zip": "94105"
28+
}
29+
})
30+
]
31+
32+
# Define relations between the entities
33+
relations = [
34+
Relation(id="r1", source="1", target="2", name="Friend"),
35+
Relation(id="r2", source="1", target="3", name="Employee"),
36+
Relation(id="r3", source="2", target="3", name="Employer"),
37+
]
38+
39+
# Initialize the PyechartsRenderer
40+
renderer = PyechartsRenderer(repulsion=2000, title="Graph Example with Nested Entities")
41+
42+
# Render the graph using the provided nodes and links
43+
graph = renderer.render(entities, relations, output_path="graph_nested.html")

examples/extract_entities_json_schema_from_pdf.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@ def main():
66
load_dotenv() # Load environment variables from .env file
77
api_key = os.getenv("OPENAI_API_KEY")
88

9-
# Path to your PDF file
10-
pdf_path = "./test.pdf"
9+
# get current directory
10+
curr_dirr = os.path.dirname(os.path.abspath(__file__))
11+
pdf_name = "test.pdf"
12+
pdf_path = os.path.join(curr_dirr, pdf_name)
1113

1214
# Create a PDFParser instance with the API key
13-
pdf_parser = PDFParser(api_key)
15+
pdf_parser = PDFParser(
16+
api_key=api_key,
17+
model="gpt-4o-mini"
18+
)
1419

1520
# Create a FileExtraxctor instance with the PDF parser
1621
pdf_extractor = FileExtractor(pdf_path, pdf_parser)
Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
from scrapeschema import FileExtractor, PDFParser
2+
from scrapeschema.renderers import PyechartsRenderer
23
import os
34
from dotenv import load_dotenv
5+
load_dotenv() # Load environment variables from .env file
46

5-
def main():
6-
load_dotenv() # Load environment variables from .env file
7-
api_key = os.getenv("OPENAI_API_KEY")
7+
# Get the OpenAI API key from the environment variables
8+
api_key = os.getenv("OPENAI_API_KEY")
9+
10+
# get current directory
11+
curr_dirr = os.path.dirname(os.path.abspath(__file__))
812

9-
# Path to your PDF file
10-
pdf_path = "./test.pdf"
13+
def main():
14+
# Path to the PDF file
15+
pdf_name = "test.pdf"
16+
pdf_path = os.path.join(curr_dirr, pdf_name)
1117

1218
# Create a PDFParser instance with the API key
1319
pdf_parser = PDFParser(api_key)
@@ -17,12 +23,18 @@ def main():
1723

1824
# Extract entities from the PDF
1925
entities = pdf_extractor.extract_entities()
26+
relations = pdf_extractor.extract_relations()
2027

21-
print(entities)
28+
# Initialize the PyechartsRenderer
29+
renderer = PyechartsRenderer(repulsion=2000, title="Entity-Relationship Graph")
2230

23-
relations = pdf_extractor.extract_relations()
24-
print(relations)
25-
31+
# Render the graph using the provided nodes and links
32+
graph = renderer.render(entities, relations, output_path="graph.html")
33+
34+
print(graph)
2635

2736
if __name__ == "__main__":
28-
main()
37+
main()
38+
39+
40+

pyproject.toml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
[project]
2-
32
name = "scrapeschema"
43
version = "0.0.1"
5-
description = "library for creating ontologies from documents"
4+
description = "Library for creating ontologies from documents using LLM"
65
authors = [
76
{ name = "Marco Vinciguerra", email = "mvincig11@gmail.com" },
87
{ name = "Marco Perini", email = "perinim.98@gmail.com" },
@@ -13,7 +12,6 @@ dependencies = [
1312
"certifi==2024.7.4",
1413
"charset-normalizer==3.3.2",
1514
"idna==3.8",
16-
"pdf2image==1.17.0",
1715
"pillow==10.4.0",
1816
"python-dotenv==1.0.1",
1917
"requests==2.32.3",
@@ -26,6 +24,10 @@ homepage = "https://scrapegraphai.com/"
2624
repository = "https://github.com/ScrapeGraphAI/ScrapeSchema"
2725
documentation = ""
2826
keywords = [
27+
"scrapeschema",
28+
"ontologies",
29+
"documents",
30+
"knowledge graph",
2931
"scrapegraph",
3032
"scrapegraphai",
3133
"langchain",
@@ -53,7 +55,7 @@ classifiers = [
5355
requires-python = ">=3.9,<4.0"
5456

5557
[project.optional-dependencies]
56-
burr = ["burr[start]==0.22.1"]
58+
renderers = ["pyecharts==2.0.6"]
5759
docs = ["sphinx==6.0", "furo==2024.5.6"]
5860

5961
[build-system]
@@ -65,12 +67,12 @@ managed = true
6567
dev-dependencies = [
6668
"pytest==8.0.0",
6769
"pytest-mock==3.14.0",
68-
"-e file:.[burr]",
70+
"-e file:.[renderers]",
6971
"-e file:.[docs]",
7072
"pylint>=3.2.5",
7173
]
74+
7275
[tool.rye.scripts]
73-
pylint-local = "pylint scrapegraphai/**/*.py"
74-
pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py"
76+
pylint-local = "pylint scrapeschema/**/*.py"
77+
pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapeschema/**/*.py"
7578
update-requirements = "python 'manual deployment/autorequirements.py'"
76-

0 commit comments

Comments
 (0)