From 407912d918f0d722fa313edd2437fa8165d9a613 Mon Sep 17 00:00:00 2001 From: martonvago Date: Wed, 29 Oct 2025 14:37:20 +0000 Subject: [PATCH 1/9] feat: :sparkles: add example field --- src/check_datapackage/examples.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/check_datapackage/examples.py b/src/check_datapackage/examples.py index e299fe0a..49d7ed70 100644 --- a/src/check_datapackage/examples.py +++ b/src/check_datapackage/examples.py @@ -2,6 +2,26 @@ from typing import Any +def example_field_properties() -> dict[str, Any]: + """Create a set of example field properties. + + Returns: + A set of example field properties. + + Examples: + ```{python} + import check_datapackage as cdp + + cdp.example_field_properties() + ``` + """ + return { + "name": "eye-colour", + "type": "string", + "title": "Woolly dormouse eye colour", + } + + def example_resource_properties() -> dict[str, Any]: """Create a set of example resource properties. @@ -19,6 +39,7 @@ def example_resource_properties() -> dict[str, Any]: "name": "woolly-dormice-2015", "title": "Body fat percentage in the hibernating woolly dormouse", "path": "resources/woolly-dormice-2015/data.parquet", + "schema": {"fields": [example_field_properties()]}, } From 517e869b9e22998bb7227de4dc2e956bbe49b68c Mon Sep 17 00:00:00 2001 From: martonvago Date: Wed, 29 Oct 2025 14:38:04 +0000 Subject: [PATCH 2/9] feat: :sparkles: handle grouped errors on field --- src/check_datapackage/check.py | 64 ++++++++++++++++++++++++++++-- src/check_datapackage/constants.py | 18 +++++++++ tests/test_check.py | 59 +++++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 3 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index e9c47b5f..64c41cb8 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -6,7 +6,11 @@ from jsonschema import Draft7Validator, FormatChecker, ValidationError from check_datapackage.config import Config -from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS +from check_datapackage.constants import ( + DATA_PACKAGE_SCHEMA_PATH, + FIELD_TYPES, + GROUP_ERRORS, +) from check_datapackage.custom_check import apply_custom_checks from check_datapackage.exclusion import exclude from check_datapackage.internals import ( @@ -116,6 +120,7 @@ class SchemaError: type: str schema_path: str jsonpath: str + instance: Any parent: Optional["SchemaError"] = None @@ -158,6 +163,12 @@ def _handle_grouped_error( if parent_error.schema_path.endswith("resources/items/properties/path/oneOf"): schema_errors = _handle_S_resources_x_path(parent_error, schema_errors) + # Handle issues at $.resources[x].schema.fields[x] + if parent_error.schema_path.endswith("fields/items/oneOf"): + schema_errors = _handle_S_resources_x_schema_fields_x( + parent_error, schema_errors + ) + return schema_errors @@ -166,7 +177,7 @@ def _handle_S_resources_x( schema_errors: list[SchemaError], ) -> list[SchemaError]: """Do not flag missing `path` and `data` separately.""" - errors_in_group = _filter(schema_errors, lambda error: error.parent == parent_error) + errors_in_group = _get_errors_in_group(schema_errors, parent_error) # If the parent error is caused by other errors, remove it if errors_in_group: schema_errors.remove(parent_error) @@ -185,6 +196,7 @@ def _handle_S_resources_x( type="required", jsonpath=parent_error.jsonpath, schema_path=parent_error.schema_path, + instance=parent_error.instance, ) ) @@ -203,7 +215,7 @@ def _handle_S_resources_x_path( If `path` is a string, flag errors for the string-based schema. If `path` is an array, flag errors for the array-based schema. """ - errors_in_group = _filter(schema_errors, lambda error: error.parent == parent_error) + errors_in_group = _get_errors_in_group(schema_errors, parent_error) type_errors = _filter(errors_in_group, _is_path_type_error) only_type_errors = len(errors_in_group) == len(type_errors) @@ -219,6 +231,7 @@ def _handle_S_resources_x_path( type="type", jsonpath=type_errors[0].jsonpath, schema_path=type_errors[0].schema_path, + instance=parent_error.instance, ) ) @@ -226,6 +239,44 @@ def _handle_S_resources_x_path( return _filter(schema_errors, lambda error: error not in type_errors) +def _handle_S_resources_x_schema_fields_x( + parent_error: SchemaError, + schema_errors: list[SchemaError], +) -> list[SchemaError]: + """Only flag errors for the relevant field type. + + E.g., if the field type is `string`, flag errors for the string-based schema only. + """ + errors_in_group = _get_errors_in_group(schema_errors, parent_error) + schema_errors.remove(parent_error) + + field_type: str = parent_error.instance.get("type", "string") + + # The field's type is unknown + if field_type not in FIELD_TYPES: + unknown_field_error = SchemaError( + message=f"Unknown field type. Please use one of {', '.join(FIELD_TYPES)}.", + type="enum", + jsonpath=f"{parent_error.jsonpath}.type", + schema_path=parent_error.schema_path, + instance=parent_error.instance, + ) + # Replace all errors with an unknown field error + schema_errors.append(unknown_field_error) + return _filter(schema_errors, lambda error: error not in errors_in_group) + + # The field's type is known; keep only errors for this field type + schema_index = FIELD_TYPES.index(field_type) + errors_for_other_types = _filter( + errors_in_group, + lambda error: f"fields/items/oneOf/{schema_index}/" not in error.schema_path, + ) + return _filter( + schema_errors, + lambda error: error not in errors_for_other_types, + ) + + def _validation_error_to_schema_errors(error: ValidationError) -> list[SchemaError]: current = [_create_schema_error(error)] if not error.context: @@ -258,6 +309,7 @@ def _create_schema_error(error: ValidationError) -> SchemaError: type=str(error.validator), jsonpath=_get_full_json_path_from_error(error), schema_path="/".join(_map(error.absolute_schema_path, str)), + instance=error.instance, parent=_create_schema_error(error.parent) if error.parent else None, # type: ignore[arg-type] ) @@ -276,3 +328,9 @@ def _create_issue(error: SchemaError) -> Issue: jsonpath=error.jsonpath, type=error.type, ) + + +def _get_errors_in_group( + schema_errors: list[SchemaError], parent_error: SchemaError +) -> list[SchemaError]: + return _filter(schema_errors, lambda error: error.parent == parent_error) diff --git a/src/check_datapackage/constants.py b/src/check_datapackage/constants.py index a2150c8e..59ce6603 100644 --- a/src/check_datapackage/constants.py +++ b/src/check_datapackage/constants.py @@ -6,3 +6,21 @@ DATA_PACKAGE_SCHEMA_PATH = Path( str(files("check_datapackage.schemas").joinpath("data-package-2-0.json")) ) + +FIELD_TYPES = [ + "string", + "number", + "integer", + "date", + "time", + "datetime", + "year", + "yearmonth", + "boolean", + "object", + "geopoint", + "geojson", + "array", + "duration", + "any", +] diff --git a/tests/test_check.py b/tests/test_check.py index 95788e35..5f1387d6 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -2,6 +2,7 @@ from check_datapackage.check import check from check_datapackage.config import Config +from check_datapackage.constants import FIELD_TYPES from check_datapackage.examples import ( example_package_properties, example_resource_properties, @@ -263,3 +264,61 @@ def test_fail_with_bad_resource_path(path, location, type): assert len(issues) == 1 assert issues[0].type == type assert issues[0].jsonpath == location + + +def test_fail_empty_field(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0] = {} + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "required" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].name" + + +def test_fail_unknown_field(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown" + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" + + +@mark.parametrize("type", FIELD_TYPES) +def test_fail_field_with_bad_property(type): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = type + properties["resources"][0]["schema"]["fields"][0]["title"] = 4 + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "type" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].title" + + +def test_fail_field_with_bad_format(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["format"] = 4 + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].format" + + +def test_fail_unknown_field_with_bad_property(): + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["title"] = 4 + properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown" + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" From 76f5a8bd2fd50fe41ab1e8defc9aa097b450a2ea Mon Sep 17 00:00:00 2001 From: martonvago Date: Mon, 3 Nov 2025 14:14:31 +0000 Subject: [PATCH 3/9] refactor: :recycle: review markups --- src/check_datapackage/check.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index e7ecdb80..cff100b3 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -113,6 +113,7 @@ class SchemaError: schema_path (str): The path to the violated check in the JSON schema. Path components are separated by '/'. jsonpath (str): The JSON path to the field that violates the check. + instance (Any): The part of the object that failed the check. parent (Optional[SchemaError]): The error group the error belongs to, if any. """ @@ -236,7 +237,10 @@ def _handle_S_resources_x_schema_fields_x( # The field's type is unknown if field_type not in FIELD_TYPES: unknown_field_error = SchemaError( - message=f"Unknown field type. Please use one of {', '.join(FIELD_TYPES)}.", + message=( + "Unknown Data Package field type. Please use one of" + f" {', '.join(FIELD_TYPES)}." + ), type="enum", jsonpath=f"{parent_error.jsonpath}.type", schema_path=parent_error.schema_path, From 424f9d62f1bee9fa4cf87119076a3d4dc6bc3cd2 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Tue, 4 Nov 2025 16:24:19 +0000 Subject: [PATCH 4/9] feat: :sparkles: handle grouped enum constraint errors --- src/check_datapackage/check.py | 80 ++++++++++++++++++++++++++++++++++ tests/test_check.py | 60 +++++++++++++++++++++++++ 2 files changed, 140 insertions(+) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index cff100b3..605d2d64 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -114,6 +114,7 @@ class SchemaError: Path components are separated by '/'. jsonpath (str): The JSON path to the field that violates the check. instance (Any): The part of the object that failed the check. + schema_value (Optional[Any]): The part of the schema violated by this error. parent (Optional[SchemaError]): The error group the error belongs to, if any. """ @@ -122,6 +123,7 @@ class SchemaError: schema_path: str jsonpath: str instance: Any + schema_value: Optional[Any] = None parent: Optional["SchemaError"] = None @@ -262,12 +264,85 @@ def _handle_S_resources_x_schema_fields_x( return edits +def _handle_S_resources_x_schema_fields_x_constraints_enum( + parent_error: SchemaError, + schema_errors: list[SchemaError], +) -> SchemaErrorEdits: + """Only flag errors for the relevant field type. + + E.g., if the field type is `string`, flag enum errors for the string-based + schema only. + """ + edits = SchemaErrorEdits() + if not parent_error.parent: + return edits + + errors_in_group = _get_errors_in_group(schema_errors, parent_error) + field_type: str = parent_error.parent.instance.get("type", "string") + edits.remove.append(parent_error) + + # The field's type is unknown; this is already flagged, so remove all errors + if field_type not in FIELD_TYPES: + edits.remove.extend(errors_in_group) + return edits + + # The field's type is known; keep only errors for this field type + schema_index = FIELD_TYPES.index(field_type) + path_for_type = f"fields/items/oneOf/{schema_index}/" + + errors_for_this_type = _filter( + errors_in_group, + lambda error: path_for_type in error.schema_path and error.type == "type", + ) + errors_for_other_types = _filter( + errors_in_group, lambda error: path_for_type not in error.schema_path + ) + + edits.remove.extend(errors_for_other_types) + if not errors_for_this_type: + return edits + + # Unify multiple enum errors + an_error = errors_for_this_type[0] + same_type = all( + _map( + errors_for_this_type, + lambda error: type(error.instance) is type(an_error.instance), + ) + ) + message = "All enum values must be the same type." + if same_type: + allowed_types = set( + _map(errors_for_this_type, lambda error: str(error.schema_value)) + ) + message = ( + "Incorrect enum value type. Enum values should be " + f"one of {', '.join(allowed_types)}." + ) + + unified_error = SchemaError( + message=message, + type="type", + schema_path=an_error.schema_path, + jsonpath=_strip_index(an_error.jsonpath), + instance=an_error.instance, + ) + edits.add.append(unified_error) + edits.remove.extend(errors_for_this_type) + + return edits + + _schema_path_to_handler: list[ tuple[str, Callable[[SchemaError, list[SchemaError]], SchemaErrorEdits]] ] = [ ("resources/items/oneOf", _handle_S_resources_x), ("resources/items/properties/path/oneOf", _handle_S_resources_x_path), ("fields/items/oneOf", _handle_S_resources_x_schema_fields_x), + ( + "constraints/properties/enum/oneOf", + _handle_S_resources_x_schema_fields_x_constraints_enum, + ), ] @@ -340,6 +415,7 @@ def _create_schema_error(error: ValidationError) -> SchemaError: jsonpath=_get_full_json_path_from_error(error), schema_path="/".join(_map(error.absolute_schema_path, str)), instance=error.instance, + schema_value=error.validator_value, parent=_create_schema_error(error.parent) if error.parent else None, # type: ignore[arg-type] ) @@ -364,3 +440,7 @@ def _get_errors_in_group( schema_errors: list[SchemaError], parent_error: SchemaError ) -> list[SchemaError]: return _filter(schema_errors, lambda error: error.parent == parent_error) + + +def _strip_index(jsonpath: str) -> str: + return re.sub(r"\[\d+\]$", "", jsonpath) diff --git a/tests/test_check.py b/tests/test_check.py index 0241252a..137fc94d 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -331,3 +331,63 @@ def test_fail_unknown_field_with_bad_property(): assert len(issues) == 1 assert issues[0].type == "enum" assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" + + +def test_fail_unknown_field_with_bad_enum_constraint(): + """Fail a field whose enum constraint is the wrong type when the field's + type is unknown.""" + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown" + properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": {}} + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "enum" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" + + +def test_fail_simple_field_with_bad_enum_constraint(): + """Fail a field whose enum values are the wrong type when enum values can + have only one type.""" + properties = example_package_properties() + # Expecting enum array to contain strings + properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": [1]} + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "type" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum[0]" + + +def test_fail_complex_field_with_bad_enum_constraint(): + """Fail a field whose enum values are the wrong type when enum values can + have multiple types.""" + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = "number" + # Expecting enum array to contain numbers or strings + properties["resources"][0]["schema"]["fields"][0]["constraints"] = { + "enum": [{}], + } + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "type" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum" + + +def test_fail_field_with_mixed_type_enum_constraint(): + """Fail a field whose enum values are not all the same type.""" + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = "geopoint" + properties["resources"][0]["schema"]["fields"][0]["constraints"] = { + "enum": [{}, [], "string", 1], + } + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "type" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum" From 4805cffa48dde8bcac1d15772569e52b7341bf08 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Thu, 6 Nov 2025 13:57:05 +0000 Subject: [PATCH 5/9] refactor: :recycle: simplify code somewhat --- src/check_datapackage/check.py | 75 ++++++++++++++-------------------- tests/test_check.py | 13 ++++++ 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index ddfceaf0..1a7ae76a 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -310,67 +310,54 @@ def _handle_S_resources_x_schema_fields_x_constraints_enum( parent_error: SchemaError, schema_errors: list[SchemaError], ) -> SchemaErrorEdits: - """Only flag errors for the relevant field type. - - E.g., if the field type is `string`, flag enum errors for the string-based - schema only. - """ - edits = SchemaErrorEdits() - if not parent_error.parent: - return edits - + """Only flag errors for the relevant field type and simplify errors.""" + + def _error_is_for_field_type() -> bool: + if not parent_error.parent: + return False + field_type: str = parent_error.parent.instance.get("type", "string") + if field_type not in FIELD_TYPES: + return False + schema_index = FIELD_TYPES.index(field_type) + return f"fields/items/oneOf/{schema_index}/" in parent_error.schema_path + + edits = SchemaErrorEdits(remove=[parent_error]) errors_in_group = _get_errors_in_group(schema_errors, parent_error) - field_type: str = parent_error.parent.instance.get("type", "string") - edits.remove.append(parent_error) - # The field's type is unknown; this is already flagged, so remove all errors - if field_type not in FIELD_TYPES: + # Remove errors for other field types + if not _error_is_for_field_type(): edits.remove.extend(errors_in_group) return edits - # The field's type is known; keep only errors for this field type - schema_index = FIELD_TYPES.index(field_type) - path_for_type = f"fields/items/oneOf/{schema_index}/" - - errors_for_this_type = _filter( - errors_in_group, - lambda error: path_for_type in error.schema_path and error.type == "type", - ) - errors_for_other_types = _filter( - errors_in_group, lambda error: path_for_type not in error.schema_path + enum_errors = _filter( + errors_in_group, lambda error: error.jsonpath.endswith("enum") ) + value_errors = _filter(errors_in_group, lambda error: error not in enum_errors) + edits.remove.extend(value_errors) - edits.remove.extend(errors_for_other_types) - if not errors_for_this_type: + # Keep only top-level enum errors, if any + if enum_errors: return edits - # Unify multiple enum errors - an_error = errors_for_this_type[0] - same_type = all( - _map( - errors_for_this_type, - lambda error: type(error.instance) is type(an_error.instance), - ) - ) + # Replace value errors with a simpler error message = "All enum values must be the same type." + same_type = len(set(_map(parent_error.instance, lambda value: type(value)))) == 1 if same_type: - allowed_types = set( - _map(errors_for_this_type, lambda error: str(error.schema_value)) - ) + allowed_types = set(_map(value_errors, lambda error: str(error.schema_value))) message = ( "Incorrect enum value type. Enum values should be " f"one of {', '.join(allowed_types)}." ) - unified_error = SchemaError( - message=message, - type="type", - schema_path=an_error.schema_path, - jsonpath=_strip_index(an_error.jsonpath), - instance=an_error.instance, + edits.add.append( + SchemaError( + message=message, + type="type", + schema_path=value_errors[0].schema_path, + jsonpath=_strip_index(value_errors[0].jsonpath), + instance=value_errors[0].instance, + ) ) - edits.add.append(unified_error) - edits.remove.extend(errors_for_this_type) return edits diff --git a/tests/test_check.py b/tests/test_check.py index 40e31909..e55018be 100644 --- a/tests/test_check.py +++ b/tests/test_check.py @@ -333,6 +333,19 @@ def test_fail_unknown_field_with_bad_property(): assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type" +def test_fail_field_with_non_unique_enum_values(): + """Fail a field whose enum array contains duplicate values.""" + properties = example_package_properties() + properties["resources"][0]["schema"]["fields"][0]["type"] = "number" + properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": [1, 1]} + + issues = check(properties) + + assert len(issues) == 1 + assert issues[0].type == "uniqueItems" + assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum" + + def test_fail_unknown_field_with_bad_enum_constraint(): """Fail a field whose enum constraint is the wrong type when the field's type is unknown.""" From e4e8733724027febc1922f943f3402aec860dc95 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Fri, 7 Nov 2025 11:22:02 +0000 Subject: [PATCH 6/9] refactor: :recycle: unnest helper function --- src/check_datapackage/check.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index 1a7ae76a..3e6a1581 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -311,21 +311,11 @@ def _handle_S_resources_x_schema_fields_x_constraints_enum( schema_errors: list[SchemaError], ) -> SchemaErrorEdits: """Only flag errors for the relevant field type and simplify errors.""" - - def _error_is_for_field_type() -> bool: - if not parent_error.parent: - return False - field_type: str = parent_error.parent.instance.get("type", "string") - if field_type not in FIELD_TYPES: - return False - schema_index = FIELD_TYPES.index(field_type) - return f"fields/items/oneOf/{schema_index}/" in parent_error.schema_path - edits = SchemaErrorEdits(remove=[parent_error]) errors_in_group = _get_errors_in_group(schema_errors, parent_error) # Remove errors for other field types - if not _error_is_for_field_type(): + if _error_not_for_field_type(parent_error): edits.remove.extend(errors_in_group) return edits @@ -362,6 +352,16 @@ def _error_is_for_field_type() -> bool: return edits +def _error_not_for_field_type(parent_error: SchemaError) -> bool: + if not parent_error.parent: + return True + field_type: str = parent_error.parent.instance.get("type", "string") + if field_type not in FIELD_TYPES: + return True + schema_index = FIELD_TYPES.index(field_type) + return f"fields/items/oneOf/{schema_index}/" not in parent_error.schema_path + + _schema_path_to_handler: list[ tuple[str, Callable[[SchemaError, list[SchemaError]], SchemaErrorEdits]] ] = [ From 6f9759e547687aca6a8edf3c89beeafa7b1daf35 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Fri, 7 Nov 2025 11:58:10 +0000 Subject: [PATCH 7/9] refactor: :recycle: make function shorter --- src/check_datapackage/check.py | 38 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index 3e6a1581..b7a0ca93 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -319,17 +319,24 @@ def _handle_S_resources_x_schema_fields_x_constraints_enum( edits.remove.extend(errors_in_group) return edits - enum_errors = _filter( - errors_in_group, lambda error: error.jsonpath.endswith("enum") + value_errors = _filter( + errors_in_group, + lambda error: not error.jsonpath.endswith("enum"), ) - value_errors = _filter(errors_in_group, lambda error: error not in enum_errors) + + # If there are only value errors, simplify them + if value_errors == errors_in_group: + edits.add.append(_get_unified_enum_values_error(parent_error, value_errors)) + + # Otherwise, keep only top-level enum errors edits.remove.extend(value_errors) + return edits - # Keep only top-level enum errors, if any - if enum_errors: - return edits - # Replace value errors with a simpler error +def _get_unified_enum_values_error( + parent_error: SchemaError, + value_errors: list[SchemaError], +) -> SchemaError: message = "All enum values must be the same type." same_type = len(set(_map(parent_error.instance, lambda value: type(value)))) == 1 if same_type: @@ -338,19 +345,14 @@ def _handle_S_resources_x_schema_fields_x_constraints_enum( "Incorrect enum value type. Enum values should be " f"one of {', '.join(allowed_types)}." ) - - edits.add.append( - SchemaError( - message=message, - type="type", - schema_path=value_errors[0].schema_path, - jsonpath=_strip_index(value_errors[0].jsonpath), - instance=value_errors[0].instance, - ) + return SchemaError( + message=message, + type="type", + schema_path=value_errors[0].schema_path, + jsonpath=_strip_index(value_errors[0].jsonpath), + instance=value_errors[0].instance, ) - return edits - def _error_not_for_field_type(parent_error: SchemaError) -> bool: if not parent_error.parent: From d4f8b69116c433a3b46827d3f8720415f3e4cd1b Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Fri, 14 Nov 2025 10:29:15 +0000 Subject: [PATCH 8/9] refactor: :recycle: review markups --- src/check_datapackage/check.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index dfed8ab0..b5a0a260 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -315,7 +315,7 @@ def _handle_S_resources_x_schema_fields_x_constraints_enum( errors_in_group = _get_errors_in_group(schema_errors, parent_error) # Remove errors for other field types - if _error_not_for_field_type(parent_error): + if _not_field_type_error(parent_error): edits.remove.extend(errors_in_group) return edits @@ -326,14 +326,14 @@ def _handle_S_resources_x_schema_fields_x_constraints_enum( # If there are only value errors, simplify them if value_errors == errors_in_group: - edits.add.append(_get_unified_enum_values_error(parent_error, value_errors)) + edits.add.append(_get_enum_values_error(parent_error, value_errors)) # Otherwise, keep only top-level enum errors edits.remove.extend(value_errors) return edits -def _get_unified_enum_values_error( +def _get_enum_values_error( parent_error: SchemaError, value_errors: list[SchemaError], ) -> SchemaError: @@ -342,7 +342,7 @@ def _get_unified_enum_values_error( if same_type: allowed_types = set(_map(value_errors, lambda error: str(error.schema_value))) message = ( - "Incorrect enum value type. Enum values should be " + "The enum value type is not correct. Enum values should be " f"one of {', '.join(allowed_types)}." ) return SchemaError( @@ -354,7 +354,7 @@ def _get_unified_enum_values_error( ) -def _error_not_for_field_type(parent_error: SchemaError) -> bool: +def _not_field_type_error(parent_error: SchemaError) -> bool: if not parent_error.parent: return True field_type: str = parent_error.parent.instance.get("type", "string") From 1cc2769329d966ee2350fc028e4c7c3fe5f168d2 Mon Sep 17 00:00:00 2001 From: Marton Vago Date: Fri, 14 Nov 2025 12:55:27 +0000 Subject: [PATCH 9/9] docs: :memo: review markups --- src/check_datapackage/check.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/check_datapackage/check.py b/src/check_datapackage/check.py index 02e54ac7..549f1d35 100644 --- a/src/check_datapackage/check.py +++ b/src/check_datapackage/check.py @@ -156,7 +156,8 @@ class SchemaError: Path components are separated by '/'. jsonpath (str): The JSON path to the field that violates the check. instance (Any): The part of the object that failed the check. - schema_value (Optional[Any]): The part of the schema violated by this error. + schema_value (Optional[Any]): The expected value that is checked against, + which is part of the schema violated by this error. parent (Optional[SchemaError]): The error group the error belongs to, if any. """