From 407912d918f0d722fa313edd2437fa8165d9a613 Mon Sep 17 00:00:00 2001 From: martonvago Date: Wed, 29 Oct 2025 14:37:20 +0000 Subject: [PATCH 1/4] feat: :sparkles: add example field --- src/check_datapackage/examples.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/check_datapackage/examples.py b/src/check_datapackage/examples.py index e299fe0a..49d7ed70 100644 --- a/src/check_datapackage/examples.py +++ b/src/check_datapackage/examples.py @@ -2,6 +2,26 @@ from typing import Any +def example_field_properties() -> dict[str, Any]: + """Create a set of example field properties. + + Returns: + A set of example field properties. + + Examples: + ```{python} + import check_datapackage as cdp + + cdp.example_field_properties() + ``` + """ + return { + "name": "eye-colour", + "type": "string", + "title": "Woolly dormouse eye colour", + } + + def example_resource_properties() -> dict[str, Any]: """Create a set of example resource properties. @@ -19,6 +39,7 @@ def example_resource_properties() -> dict[str, Any]: "name": "woolly-dormice-2015", "title": "Body fat percentage in the hibernating woolly dormouse", "path": "resources/woolly-dormice-2015/data.parquet", + "schema": {"fields": [example_field_properties()]}, } From 517e869b9e22998bb7227de4dc2e956bbe49b68c Mon Sep 17 00:00:00 2001 From: martonvago Date: Wed, 29 Oct 2025 14:38:04 +0000 Subject: [PATCH 2/4] feat: :sparkles: handle grouped errors on field --- src/check_datapackage/check.py | 64 ++++++++++++++++++++++++++++-- src/check_datapackage/constants.py | 18 +++++++++ tests/test_check.py | 59 +++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 3 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index e9c47b5f..64c41cb8 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -6,7 +6,11 @@ from jsonschema import Draft7Validator, FormatChecker, ValidationError from check_datapackage.config import Config -from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS +from check_datapackage.constants import ( + DATA_PACKAGE_SCHEMA_PATH, + FIELD_TYPES, + GROUP_ERRORS, +) from check_datapackage.custom_check import apply_custom_checks from check_datapackage.exclusion import exclude from check_datapackage.internals import ( @@ -116,6 +120,7 @@ class SchemaError: type: str schema_path: str jsonpath: str + instance: Any parent: Optional["SchemaError"] = None @@ -158,6 +163,12 @@ def _handle_grouped_error( if parent_error.schema_path.endswith("resources/items/properties/path/oneOf"): schema_errors = _handle_S_resources_x_path(parent_error, schema_errors) + # Handle issues at $.resources[x].schema.fields[x] + if parent_error.schema_path.endswith("fields/items/oneOf"): + schema_errors = _handle_S_resources_x_schema_fields_x( + parent_error, schema_errors + ) + return schema_errors @@ -166,7 +177,7 @@ def _handle_S_resources_x( schema_errors: list[SchemaError], ) -> list[SchemaError]: """Do not flag missing `path` and `data` separately.""" - errors_in_group = _filter(schema_errors, lambda error: error.parent == parent_error) + errors_in_group = _get_errors_in_group(schema_errors, parent_error) # If the parent error is caused by other errors, remove it if errors_in_group: schema_errors.remove(parent_error) @@ -185,6 +196,7 @@ def _handle_S_resources_x( type="required", jsonpath=parent_error.jsonpath, schema_path=parent_error.schema_path, + instance=parent_error.instance, ) ) @@ -203,7 +215,7 @@ def _handle_S_resources_x_path( If `path` is a string, flag errors for the string-based schema. If `path` is an array, flag errors for the array-based schema. """ - errors_in_group = _filter(schema_errors, lambda error: error.parent == parent_error) + errors_in_group = _get_errors_in_group(schema_errors, parent_error) type_errors = _filter(errors_in_group, _is_path_type_error) only_type_errors = len(errors_in_group) == len(type_errors) @@ -219,6 +231,7 @@ def _handle_S_resources_x_path( type="type", jsonpath=type_errors[0].jsonpath, schema_path=type_errors[0].schema_path, + instance=parent_error.instance, ) ) @@ -226,6 +239,44 @@ def _handle_S_resources_x_path( return _filter(schema_errors, lambda error: error not in type_errors) +def _handle_S_resources_x_schema_fields_x( + parent_error: SchemaError, + schema_errors: list[SchemaError], +) -> list[SchemaError]: + """Only flag errors for the relevant field type. + + E.g., if the field type is `string`, flag errors for the string-based schema only. + """ + errors_in_group = _get_errors_in_group(schema_errors, parent_error) + schema_errors.remove(parent_error) + + field_type: str = parent_error.instance.get("type", "string") + + # The field's type is unknown + if field_type not in FIELD_TYPES: + unknown_field_error = SchemaError( + message=f"Unknown field type. Please use one of {', '.join(FIELD_TYPES)}.", + type="enum", + jsonpath=f"{parent_error.jsonpath}.type", + schema_path=parent_error.schema_path, + instance=parent_error.instance, + ) + # Replace all errors with an unknown field error + schema_errors.append(unknown_field_error) + return _filter(schema_errors, lambda error: error not in errors_in_group) + + # The field's type is known; keep only errors for this field type + schema_index = FIELD_TYPES.index(field_type) + errors_for_other_types = _filter( + errors_in_group, + lambda error: f"fields/items/oneOf/{schema_index}/" not in error.schema_path, + ) + return _filter( + schema_errors, + lambda error: error not in errors_for_other_types, + ) + + def _validation_error_to_schema_errors(error: ValidationError) -> list[SchemaError]: current = [_create_schema_error(error)] if not error.context: @@ -258,6 +309,7 @@ def _create_schema_error(error: ValidationError) -> SchemaError: type=str(error.validator), jsonpath=_get_full_json_path_from_error(error), schema_path="/".join(_map(error.absolute_schema_path, str)), + instance=error.instance, parent=_create_schema_error(error.parent) if error.parent else None, # type: ignore[arg-type] ) @@ -276,3 +328,9 @@ def _create_issue(error: SchemaError) -> Issue: jsonpath=error.jsonpath, type=error.type, ) + + +def _get_errors_in_group( + schema_errors: list[SchemaError], parent_error: SchemaError +) -> list[SchemaError]: + return _filter(schema_errors, lambda error: error.parent == parent_error) diff --git a/src/check_datapackage/constants.py b/src/check_datapackage/constants.py index a2150c8e..59ce6603 100644 --- a/src/check_datapackage/constants.py +++ b/src/check_datapackage/constants.py @@ -6,3 +6,21 @@ DATA_PACKAGE_SCHEMA_PATH = Path( str(files("check_datapackage.schemas").joinpath("data-package-2-0.json")) ) + +FIELD_TYPES = [ + "string", + "number", + "integer", + "date", + "time", + "datetime", + "year", + "yearmonth", + "boolean", + "object", + "geopoint", + "geojson", + "array", + "duration", + "any", +] diff --git a/tests/test_check.py b/tests/test_check.py index 95788e35..5f1387d6 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -2,6 +2,7 @@ from check_datapackage.check import check from check_datapackage.config import Config +from check_datapackage.constants import FIELD_TYPES from check_datapackage.examples import ( example_package_properties, example_resource_properties, @@ -263,3 +264,61 @@ def test_fail_with_bad_resource_path(path, location, type): assert len(issues) == 1 assert issues[0].type == type assert issues[0].jsonpath == location + + +def test_fail_empty_field(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0] = {} + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "required" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].name" + + +def test_fail_unknown_field(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown" + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" + + +@mark.parametrize("type", FIELD_TYPES) +def test_fail_field_with_bad_property(type): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = type + properties["resources"][0]["schema"]["fields"][0]["title"] = 4 + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "type" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].title" + + +def test_fail_field_with_bad_format(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["format"] = 4 + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].format" + + +def test_fail_unknown_field_with_bad_property(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["title"] = 4 + properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown" + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" From 76f5a8bd2fd50fe41ab1e8defc9aa097b450a2ea Mon Sep 17 00:00:00 2001 From: martonvago Date: Mon, 3 Nov 2025 14:14:31 +0000 Subject: [PATCH 3/4] refactor: :recycle: review markups --- src/check_datapackage/check.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index e7ecdb80..cff100b3 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -113,6 +113,7 @@ class SchemaError: schema_path (str): The path to the violated check in the JSON schema. Path components are separated by '/'. jsonpath (str): The JSON path to the field that violates the check. + instance (Any): The part of the object that failed the check. parent (Optional[SchemaError]): The error group the error belongs to, if any. """ @@ -236,7 +237,10 @@ def _handle_S_resources_x_schema_fields_x( # The field's type is unknown if field_type not in FIELD_TYPES: unknown_field_error = SchemaError( - message=f"Unknown field type. Please use one of {', '.join(FIELD_TYPES)}.", + message=( + "Unknown Data Package field type. Please use one of" + f" {', '.join(FIELD_TYPES)}." + ), type="enum", jsonpath=f"{parent_error.jsonpath}.type", schema_path=parent_error.schema_path, From 8585c205ee1e37e9cd23714cab164ac10ea98318 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Tue, 4 Nov 2025 16:37:23 +0000 Subject: [PATCH 4/4] refactor: :recycle: change error message --- src/check_datapackage/check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index cff100b3..31d12c6d 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -238,8 +238,8 @@ def _handle_S_resources_x_schema_fields_x( if field_type not in FIELD_TYPES: unknown_field_error = SchemaError( message=( - "Unknown Data Package field type. Please use one of" - f" {', '.join(FIELD_TYPES)}." + "The type property in this resource schema field is incorrect. " + f"The value can only be one of these types: {', '.join(FIELD_TYPES)}." ), type="enum", jsonpath=f"{parent_error.jsonpath}.type",