Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions src/check_datapackage/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ class SchemaError:
Path components are separated by '/'.
jsonpath (str): The JSON path to the field that violates the check.
instance (Any): The part of the object that failed the check.
schema_value (Optional[Any]): The expected value that is checked against,
which is part of the schema violated by this error.
parent (Optional[SchemaError]): The error group the error belongs to, if any.
"""

Expand All @@ -164,6 +166,7 @@ class SchemaError:
schema_path: str
jsonpath: str
instance: Any
schema_value: Optional[Any] = None
parent: Optional["SchemaError"] = None


Expand Down Expand Up @@ -304,6 +307,64 @@ def _handle_S_resources_x_schema_fields_x(
return edits


def _handle_S_resources_x_schema_fields_x_constraints_enum(
parent_error: SchemaError,
schema_errors: list[SchemaError],
) -> SchemaErrorEdits:
"""Only flag errors for the relevant field type and simplify errors."""
edits = SchemaErrorEdits(remove=[parent_error])
errors_in_group = _get_errors_in_group(schema_errors, parent_error)

# Remove errors for other field types
if _not_field_type_error(parent_error):
edits.remove.extend(errors_in_group)
return edits

value_errors = _filter(
errors_in_group,
lambda error: not error.jsonpath.endswith("enum"),
)

# If there are only value errors, simplify them
if value_errors == errors_in_group:
edits.add.append(_get_enum_values_error(parent_error, value_errors))

# Otherwise, keep only top-level enum errors
edits.remove.extend(value_errors)
return edits


def _get_enum_values_error(
parent_error: SchemaError,
value_errors: list[SchemaError],
) -> SchemaError:
message = "All enum values must be the same type."
same_type = len(set(_map(parent_error.instance, lambda value: type(value)))) == 1
if same_type:
allowed_types = set(_map(value_errors, lambda error: str(error.schema_value)))
message = (
"The enum value type is not correct. Enum values should be "
f"one of {', '.join(allowed_types)}."
)
return SchemaError(
message=message,
type="type",
schema_path=value_errors[0].schema_path,
jsonpath=_strip_index(value_errors[0].jsonpath),
instance=value_errors[0].instance,
)


def _not_field_type_error(parent_error: SchemaError) -> bool:
if not parent_error.parent:
return True
field_type: str = parent_error.parent.instance.get("type", "string")
if field_type not in FIELD_TYPES:
return True
schema_index = FIELD_TYPES.index(field_type)
return f"fields/items/oneOf/{schema_index}/" not in parent_error.schema_path


def _handle_S_resources_x_schema_primary_key(
parent_error: SchemaError,
schema_errors: list[SchemaError],
Expand Down Expand Up @@ -420,6 +481,10 @@ def _handle_licenses(
("resources/items/oneOf", _handle_S_resources_x),
("resources/items/properties/path/oneOf", _handle_S_resources_x_path),
("fields/items/oneOf", _handle_S_resources_x_schema_fields_x),
(
"constraints/properties/enum/oneOf",
_handle_S_resources_x_schema_fields_x_constraints_enum,
),
("primaryKey/oneOf", _handle_S_resources_x_schema_primary_key),
("foreignKeys/items/oneOf", _handle_S_resources_x_schema_foreign_keys),
("licenses/items/anyOf", _handle_licenses),
Expand Down Expand Up @@ -495,6 +560,7 @@ def _create_schema_error(error: ValidationError) -> SchemaError:
jsonpath=_get_full_json_path_from_error(error),
schema_path="/".join(_map(error.absolute_schema_path, str)),
instance=error.instance,
schema_value=error.validator_value,
parent=_create_schema_error(error.parent) if error.parent else None, # type: ignore[arg-type]
)

Expand All @@ -519,3 +585,7 @@ def _get_errors_in_group(
schema_errors: list[SchemaError], parent_error: SchemaError
) -> list[SchemaError]:
return _filter(schema_errors, lambda error: error.parent == parent_error)


def _strip_index(jsonpath: str) -> str:
return re.sub(r"\[\d+\]$", "", jsonpath)
117 changes: 95 additions & 22 deletions tests/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,101 @@ def test_fail_unknown_field_with_bad_property():
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type"


def test_fail_package_license_with_no_name_or_path():
properties = example_package_properties()
del properties["licenses"][0]["name"]

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "required"
assert issues[0].jsonpath == "$.licenses[0]"


def test_fail_resource_license_with_no_name_or_path():
properties = example_package_properties()
properties["resources"][0]["licenses"] = [{}]

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "required"
assert issues[0].jsonpath == "$.resources[0].licenses[0]"


def test_fail_field_with_non_unique_enum_values():
"""Fail a field whose enum array contains duplicate values."""
properties = example_package_properties()
properties["resources"][0]["schema"]["fields"][0]["type"] = "number"
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": [1, 1]}

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "uniqueItems"
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum"


def test_fail_unknown_field_with_bad_enum_constraint():
"""Fail a field whose enum constraint is the wrong type when the field's
type is unknown."""
properties = example_package_properties()
properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown"
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": {}}

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "enum"
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type"


def test_fail_simple_field_with_bad_enum_constraint():
"""Fail a field whose enum values are the wrong type when enum values can
have only one type."""
properties = example_package_properties()
# Expecting enum array to contain strings
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {"enum": [1]}

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "type"
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum[0]"


def test_fail_complex_field_with_bad_enum_constraint():
"""Fail a field whose enum values are the wrong type when enum values can
have multiple types."""
properties = example_package_properties()
properties["resources"][0]["schema"]["fields"][0]["type"] = "number"
# Expecting enum array to contain numbers or strings
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {
"enum": [{}],
}

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "type"
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum"


def test_fail_field_with_mixed_type_enum_constraint():
"""Fail a field whose enum values are not all the same type."""
properties = example_package_properties()
properties["resources"][0]["schema"]["fields"][0]["type"] = "geopoint"
properties["resources"][0]["schema"]["fields"][0]["constraints"] = {
"enum": [{}, [], "string", 1],
}

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "type"
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].constraints.enum"


def test_pass_good_foreign_keys():
properties = example_package_properties()
properties["resources"][0]["schema"]["foreignKeys"] = [
Expand Down Expand Up @@ -534,28 +629,6 @@ def test_fail_primary_key_with_bad_array_item():
assert issues[0].jsonpath == "$.resources[0].schema.primaryKey[0]"


def test_fail_package_license_with_no_name_or_path():
properties = example_package_properties()
del properties["licenses"][0]["name"]

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "required"
assert issues[0].jsonpath == "$.licenses[0]"


def test_fail_resource_license_with_no_name_or_path():
properties = example_package_properties()
properties["resources"][0]["licenses"] = [{}]

issues = check(properties)

assert len(issues) == 1
assert issues[0].type == "required"
assert issues[0].jsonpath == "$.resources[0].licenses[0]"


def test_error_as_true():
properties = {
"name": 123,
Expand Down
Loading