diff --git a/.gitmodules b/.gitmodules index a72a4f8..d9705e1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ [submodule "third_party/substrait"] path = third_party/substrait url = https://github.com/substrait-io/substrait -[submodule "third_party/substrait-cpp"] - path = third_party/substrait-cpp - url = https://github.com/substrait-io/substrait-cpp diff --git a/README.md b/README.md index 4deeba2..9587aa0 100644 --- a/README.md +++ b/README.md @@ -136,12 +136,13 @@ relations { ``` ## Load a Substrait Plan from JSON -A substrait plan can be loaded from its JSON representation -using the ``substrait.json.load_json`` and ``substrait.json.parse_json`` +A substrait plan can be loaded [from the JSON representation +of the protobuf message](https://protobuf.dev/programming-guides/json/) using the [`protobuf` python library](https://pypi.org/project/protobuf/): functions: ``` ->>> import substrait.json +>>> from substrait.proto import Plan +>>> from google.protobuf import json_format >>> jsontext = """{ ... "relations":[ ... { @@ -182,7 +183,7 @@ functions: ... } ... ] ... }""" ->>> substrait.json.parse_json(jsontext) +>>> json_format.Parse(text=jsontext, mesage=Plan()) relations { root { input { diff --git a/src/substrait/json.py b/src/substrait/json.py deleted file mode 100644 index ed27d44..0000000 --- a/src/substrait/json.py +++ /dev/null @@ -1,25 +0,0 @@ -from google.protobuf import json_format - -from substrait.proto import Plan - - -def load_json(filename): - """Load a Substrait Plan from a json file""" - with open(filename, encoding="utf-8") as f: - return parse_json(f.read()) - - -def parse_json(text): - """Generate a Substrait Plan from its JSON definition""" - return json_format.Parse(text=text, message=Plan()) - - -def write_json(plan, filename): - """Write a Substrait Plan to a json file""" - with open(filename, "w+") as f: - f.write(dump_json(plan)) - - -def dump_json(plan): - """Dump a Substrait Plan to a string in JSON format""" - return json_format.MessageToJson(plan) diff --git a/tests/sql/test_sql_to_substrait.py b/tests/sql/test_sql_to_substrait.py index ec33ee8..4b36431 100644 --- a/tests/sql/test_sql_to_substrait.py +++ b/tests/sql/test_sql_to_substrait.py @@ -1,6 +1,6 @@ from substrait.sql.sql_to_substrait import convert import pyarrow -import substrait.json +from google.protobuf import json_format import tempfile import pyarrow.substrait as pa_substrait import pytest @@ -91,7 +91,7 @@ def duckdb_schema_resolver(name: str): conn.install_extension("substrait", repository="community") conn.load_extension("substrait") - plan_json = substrait.json.dump_json(plan) + plan_json = json_format.MessageToJson(plan) sql = f"CALL from_substrait_json('{plan_json}')" substrait_out = conn.sql(sql) diff --git a/tests/test_json.py b/tests/test_json.py deleted file mode 100644 index 741bd87..0000000 --- a/tests/test_json.py +++ /dev/null @@ -1,71 +0,0 @@ -import os -import pathlib -import tempfile - -from substrait.proto import Plan -from substrait.json import load_json, parse_json, dump_json, write_json - -import pytest - - -JSON_FIXTURES = ( - pathlib.Path(os.path.dirname(__file__)) - / ".." - / "third_party" - / "substrait-cpp" - / "src" - / "substrait" - / "textplan" - / "data" -) -JSON_TEST_FILE = sorted(JSON_FIXTURES.glob("*.json")) -JSON_TEST_FILENAMES = [path.name for path in JSON_TEST_FILE] - - -@pytest.mark.parametrize("jsonfile", JSON_TEST_FILE, ids=JSON_TEST_FILENAMES) -def test_json_load(jsonfile): - with open(jsonfile) as f: - jsondata = _strip_json_comments(f) - parsed_plan = parse_json(jsondata) - - # Save to a temporary file so we can test load_json - # on content stripped of comments. - with tempfile.TemporaryDirectory() as tmpdir: - # We use a TemporaryDirectory as on Windows NamedTemporaryFile - # doesn't allow for easy reopening of the file. - with open(pathlib.Path(tmpdir) / "jsonfile.json", "w+") as stripped_file: - stripped_file.write(jsondata) - loaded_plan = load_json(stripped_file.name) - - # The Plan constructor itself will throw an exception - # in case there is anything wrong in parsing the JSON - # so we can take for granted that if the plan was created - # it is a valid plan in terms of protobuf definition. - assert type(loaded_plan) is Plan - - # Ensure that when loading from file or from string - # the outcome is the same - assert parsed_plan == loaded_plan - - -@pytest.mark.parametrize("jsonfile", JSON_TEST_FILE, ids=JSON_TEST_FILENAMES) -def test_json_roundtrip(jsonfile): - with open(jsonfile) as f: - jsondata = _strip_json_comments(f) - - parsed_plan = parse_json(jsondata) - assert parse_json(dump_json(parsed_plan)) == parsed_plan - - # Test with write/load - with tempfile.TemporaryDirectory() as tmpdir: - filename = pathlib.Path(tmpdir) / "jsonfile.json" - write_json(parsed_plan, filename) - assert load_json(filename) == parsed_plan - - -def _strip_json_comments(jsonfile): - # The JSON files in the cpp testsuite are prefixed with - # a comment containing the SQL that matches the json plan. - # As Python JSON parser doesn't support comments, - # we have to strip them to make the content readable - return "\n".join(line for line in jsonfile.readlines() if line[0] != "#") diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp deleted file mode 160000 index e26585f..0000000 --- a/third_party/substrait-cpp +++ /dev/null @@ -1 +0,0 @@ -Subproject commit e26585f45cdfd0ed3bf03f700c354f04685398c6 diff --git a/update_cpp.sh b/update_cpp.sh deleted file mode 100644 index 804546a..0000000 --- a/update_cpp.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -echo "Updating substrait-cpp submodule..." -git submodule update --remote third_party/substrait-cpp -