Skip to content

Commit 529b0cf

Browse files
authored
feat: ✨ handle grouped errors under resource fields (#175)
# Description This PR adds a function for handling grouped errors under `$.resources[x].schema.fields[x]`. Here, the problem comes from the fact that each field type has its own sub-JSON-schema, and each one of these sub-schemas flags issues when the type of a field is not its own type. So, if a field has `type="number"` and there is something wrong with the field, then the sub-schemas for `year`, `string`, etc. will also flag issues. The goal is to flag issues only for `number`. Part of #15 Needs an in-depth review. ## Checklist - [x] Formatted Markdown - [x] Ran `just run-all`
1 parent f5c590b commit 529b0cf

File tree

3 files changed

+138
-3
lines changed

3 files changed

+138
-3
lines changed

src/check_datapackage/check.py

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
from jsonschema import Draft7Validator, FormatChecker, ValidationError
99

1010
from check_datapackage.config import Config
11-
from check_datapackage.constants import DATA_PACKAGE_SCHEMA_PATH, GROUP_ERRORS
11+
from check_datapackage.constants import (
12+
DATA_PACKAGE_SCHEMA_PATH,
13+
FIELD_TYPES,
14+
GROUP_ERRORS,
15+
)
1216
from check_datapackage.exclusion import exclude
1317
from check_datapackage.extensions import apply_extensions
1418
from check_datapackage.internals import (
@@ -151,13 +155,15 @@ class SchemaError:
151155
schema_path (str): The path to the violated check in the JSON schema.
152156
Path components are separated by '/'.
153157
jsonpath (str): The JSON path to the field that violates the check.
158+
instance (Any): The part of the object that failed the check.
154159
parent (Optional[SchemaError]): The error group the error belongs to, if any.
155160
"""
156161

157162
message: str
158163
type: str
159164
schema_path: str
160165
jsonpath: str
166+
instance: Any
161167
parent: Optional["SchemaError"] = None
162168

163169

@@ -193,7 +199,7 @@ def _handle_S_resources_x(
193199
) -> SchemaErrorEdits:
194200
"""Do not flag missing `path` and `data` separately."""
195201
edits = SchemaErrorEdits()
196-
errors_in_group = _filter(schema_errors, lambda error: error.parent == parent_error)
202+
errors_in_group = _get_errors_in_group(schema_errors, parent_error)
197203
# If the parent error is caused by other errors, remove it
198204
if errors_in_group:
199205
edits.remove.append(parent_error)
@@ -212,6 +218,7 @@ def _handle_S_resources_x(
212218
type="required",
213219
jsonpath=parent_error.jsonpath,
214220
schema_path=parent_error.schema_path,
221+
instance=parent_error.instance,
215222
)
216223
)
217224

@@ -230,7 +237,7 @@ def _handle_S_resources_x_path(
230237
If `path` is an array, flag errors for the array-based schema.
231238
"""
232239
edits = SchemaErrorEdits()
233-
errors_in_group = _filter(schema_errors, lambda error: error.parent == parent_error)
240+
errors_in_group = _get_errors_in_group(schema_errors, parent_error)
234241
type_errors = _filter(errors_in_group, _is_path_type_error)
235242
only_type_errors = len(errors_in_group) == len(type_errors)
236243

@@ -246,6 +253,7 @@ def _handle_S_resources_x_path(
246253
type="type",
247254
jsonpath=type_errors[0].jsonpath,
248255
schema_path=type_errors[0].schema_path,
256+
instance=parent_error.instance,
249257
)
250258
)
251259

@@ -254,11 +262,54 @@ def _handle_S_resources_x_path(
254262
return edits
255263

256264

265+
def _handle_S_resources_x_schema_fields_x(
266+
parent_error: SchemaError,
267+
schema_errors: list[SchemaError],
268+
) -> SchemaErrorEdits:
269+
"""Only flag errors for the relevant field type.
270+
271+
E.g., if the field type is `string`, flag errors for the string-based schema only.
272+
"""
273+
edits = SchemaErrorEdits()
274+
errors_in_group = _get_errors_in_group(schema_errors, parent_error)
275+
edits.remove.append(parent_error)
276+
277+
field_type: str = parent_error.instance.get("type", "string")
278+
279+
# The field's type is unknown
280+
if field_type not in FIELD_TYPES:
281+
unknown_field_error = SchemaError(
282+
message=(
283+
"The type property in this resource schema field is incorrect. "
284+
f"The value can only be one of these types: {', '.join(FIELD_TYPES)}."
285+
),
286+
type="enum",
287+
jsonpath=f"{parent_error.jsonpath}.type",
288+
schema_path=parent_error.schema_path,
289+
instance=parent_error.instance,
290+
)
291+
# Replace all errors with an unknown field error
292+
edits.add.append(unknown_field_error)
293+
edits.remove.extend(errors_in_group)
294+
return edits
295+
296+
# The field's type is known; keep only errors for this field type
297+
schema_index = FIELD_TYPES.index(field_type)
298+
299+
errors_for_other_types = _filter(
300+
errors_in_group,
301+
lambda error: f"fields/items/oneOf/{schema_index}/" not in error.schema_path,
302+
)
303+
edits.remove.extend(errors_for_other_types)
304+
return edits
305+
306+
257307
_schema_path_to_handler: list[
258308
tuple[str, Callable[[SchemaError, list[SchemaError]], SchemaErrorEdits]]
259309
] = [
260310
("resources/items/oneOf", _handle_S_resources_x),
261311
("resources/items/properties/path/oneOf", _handle_S_resources_x_path),
312+
("fields/items/oneOf", _handle_S_resources_x_schema_fields_x),
262313
]
263314

264315

@@ -330,6 +381,7 @@ def _create_schema_error(error: ValidationError) -> SchemaError:
330381
type=str(error.validator),
331382
jsonpath=_get_full_json_path_from_error(error),
332383
schema_path="/".join(_map(error.absolute_schema_path, str)),
384+
instance=error.instance,
333385
parent=_create_schema_error(error.parent) if error.parent else None, # type: ignore[arg-type]
334386
)
335387

@@ -348,3 +400,9 @@ def _create_issue(error: SchemaError) -> Issue:
348400
jsonpath=error.jsonpath,
349401
type=error.type,
350402
)
403+
404+
405+
def _get_errors_in_group(
406+
schema_errors: list[SchemaError], parent_error: SchemaError
407+
) -> list[SchemaError]:
408+
return _filter(schema_errors, lambda error: error.parent == parent_error)

src/check_datapackage/constants.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,21 @@
66
DATA_PACKAGE_SCHEMA_PATH = Path(
77
str(files("check_datapackage.schemas").joinpath("data-package-2-0.json"))
88
)
9+
10+
FIELD_TYPES = [
11+
"string",
12+
"number",
13+
"integer",
14+
"date",
15+
"time",
16+
"datetime",
17+
"year",
18+
"yearmonth",
19+
"boolean",
20+
"object",
21+
"geopoint",
22+
"geojson",
23+
"array",
24+
"duration",
25+
"any",
26+
]

tests/test_check.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from check_datapackage.check import DataPackageError, check
66
from check_datapackage.config import Config
7+
from check_datapackage.constants import FIELD_TYPES
78
from check_datapackage.examples import (
89
example_package_properties,
910
example_resource_properties,
@@ -274,6 +275,64 @@ def test_fail_with_bad_resource_path(path, location, type):
274275
assert issues[0].jsonpath == location
275276

276277

278+
def test_fail_empty_field():
279+
properties = example_package_properties()
280+
properties["resources"][0]["schema"]["fields"][0] = {}
281+
282+
issues = check(properties)
283+
284+
assert len(issues) == 1
285+
assert issues[0].type == "required"
286+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].name"
287+
288+
289+
def test_fail_unknown_field():
290+
properties = example_package_properties()
291+
properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown"
292+
293+
issues = check(properties)
294+
295+
assert len(issues) == 1
296+
assert issues[0].type == "enum"
297+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type"
298+
299+
300+
@mark.parametrize("type", FIELD_TYPES)
301+
def test_fail_field_with_bad_property(type):
302+
properties = example_package_properties()
303+
properties["resources"][0]["schema"]["fields"][0]["type"] = type
304+
properties["resources"][0]["schema"]["fields"][0]["title"] = 4
305+
306+
issues = check(properties)
307+
308+
assert len(issues) == 1
309+
assert issues[0].type == "type"
310+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].title"
311+
312+
313+
def test_fail_field_with_bad_format():
314+
properties = example_package_properties()
315+
properties["resources"][0]["schema"]["fields"][0]["format"] = 4
316+
317+
issues = check(properties)
318+
319+
assert len(issues) == 1
320+
assert issues[0].type == "enum"
321+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].format"
322+
323+
324+
def test_fail_unknown_field_with_bad_property():
325+
properties = example_package_properties()
326+
properties["resources"][0]["schema"]["fields"][0]["title"] = 4
327+
properties["resources"][0]["schema"]["fields"][0]["type"] = "unknown"
328+
329+
issues = check(properties)
330+
331+
assert len(issues) == 1
332+
assert issues[0].type == "enum"
333+
assert issues[0].jsonpath == "$.resources[0].schema.fields[0].type"
334+
335+
277336
def test_error_as_true():
278337
properties = {
279338
"name": 123,

0 commit comments

Comments
 (0)