diff --git a/docs/extras/code_samples/default_v2.txt b/docs/extras/code_samples/default_v2.txt index d58e4343..dbc840cc 100644 --- a/docs/extras/code_samples/default_v2.txt +++ b/docs/extras/code_samples/default_v2.txt @@ -1,5 +1,4 @@ from mindee import ClientV2, InferencePredictOptions -from mindee.parsing.v2 import InferenceResponse input_path = "/path/to/the/file.ext" api_key = "MY_API_KEY" @@ -20,9 +19,9 @@ options = InferencePredictOptions( input_doc = mindee_client.source_from_path(input_path) # Upload the file -response: InferenceResponse = mindee_client.enqueue_and_parse( +response = mindee_client.enqueue_and_parse( input_doc, options ) # Print a brief summary of the parsed data -print(response.inference) \ No newline at end of file +print(response.inference) diff --git a/mindee/parsing/v2/__init__.py b/mindee/parsing/v2/__init__.py index 88c26ad8..e69de29b 100644 --- a/mindee/parsing/v2/__init__.py +++ b/mindee/parsing/v2/__init__.py @@ -1,16 +0,0 @@ -from mindee.parsing.v2.base_field import ( - InferenceFields, - ListField, - ObjectField, - SimpleField, -) -from mindee.parsing.v2.common_response import CommonResponse -from mindee.parsing.v2.error_response import ErrorResponse -from mindee.parsing.v2.inference import Inference -from mindee.parsing.v2.inference_file import InferenceFile -from mindee.parsing.v2.inference_model import InferenceModel -from mindee.parsing.v2.inference_options import InferenceOptions -from mindee.parsing.v2.inference_response import InferenceResponse -from mindee.parsing.v2.inference_result import InferenceResult -from mindee.parsing.v2.job import Job -from mindee.parsing.v2.job_response import JobResponse diff --git a/mindee/parsing/v2/base_field.py b/mindee/parsing/v2/base_field.py index ee75498c..6d965111 100644 --- a/mindee/parsing/v2/base_field.py +++ b/mindee/parsing/v2/base_field.py @@ -1,116 +1,10 @@ -from typing import Dict, List, Union +from typing import List, Optional -from mindee.error.mindee_error import MindeeApiV2Error -from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.dynamic_field import DynamicField -class BaseField: - """Base field class for V2.""" +class BaseField(DynamicField): + """Field with base information.""" - _indent_level: int - """Indentation level for rst display.""" - - def __init__(self, indent_level=0) -> None: - self._indent_level = indent_level - - @staticmethod - def create_field( - raw_response: StringDict, indent_level: int = 0 - ) -> Union["ListField", "ObjectField", "SimpleField"]: - """Factory function to create appropriate field instances.""" - if isinstance(raw_response, dict): - if "items" in raw_response: - return ListField(raw_response, indent_level) - if "fields" in raw_response: - return ObjectField(raw_response, indent_level) - if "value" in raw_response: - return SimpleField(raw_response, indent_level) - raise MindeeApiV2Error(f"Unrecognized field format in {raw_response}.") - raise MindeeApiV2Error(f"Unrecognized field format {raw_response}.") - - -class InferenceFields(Dict[str, Union["SimpleField", "ObjectField", "ListField"]]): - """Inference fields dict.""" - - def __init__(self, raw_response: StringDict, indent_level: int = 0) -> None: - super().__init__() - for key, value in raw_response.items(): - field_obj = BaseField.create_field(value, indent_level) - self[key] = field_obj - - def __getattr__(self, item): - try: - return self[item] - except KeyError: - raise AttributeError(item) from None - - def __str__(self) -> str: - str_fields = "" - for field_key, field_value in self.items(): - str_fields += f":{field_key}: {field_value}" - return str_fields - - -class ListField(BaseField): - """List field containing multiple fields.""" - - items: List[Union["ListField", "ObjectField", "SimpleField"]] - """Items contained in the list.""" - - def __init__(self, raw_response: StringDict, indent_level: int = 0): - super().__init__(indent_level) - - self.items = [] - for item in raw_response["items"]: - if isinstance(item, dict): - self.items.append(BaseField.create_field(item, self._indent_level + 2)) - else: - raise MindeeApiV2Error(f"Unrecognized field format '{item}'.") - - def __str__(self) -> str: - out_str = "" - for item in self.items: - out_str += f"* {str(item)[2:] if item else ''}" - return "\n" + out_str if out_str else "" - - -class ObjectField(BaseField): - """Object field containing multiple fields.""" - - fields: InferenceFields - """Fields contained in the object.""" - - def __init__(self, raw_response: StringDict, indent_level: int = 0): - super().__init__(indent_level) - inner_fields = raw_response.get("fields", raw_response) - - self.fields = InferenceFields(inner_fields, self._indent_level + 1) - - def __str__(self) -> str: - out_str = "" - for field_key, field_value in self.fields.items(): - if isinstance(field_value, ListField): - value_str = "" - if len(field_value.items) > 0: - value_str = ( - " " * self._indent_level + str(field_value) - if field_value - else "" - ) - out_str += f"{' ' * self._indent_level}:{field_key}: {value_str}" - else: - out_str += f"{' ' * self._indent_level}:{field_key}: {field_value if field_value else ''}" - return out_str - - -class SimpleField(BaseField): - """Simple field containing a single value.""" - - value: Union[str, float, bool, None] - - def __init__(self, raw_response: StringDict, indent_level: int = 0): - super().__init__(indent_level) - self.value = raw_response["value"] = raw_response.get("value", None) - - def __str__(self) -> str: - return f"{self.value}\n" if self.value else "\n" + locations: List + confidence: Optional[str] diff --git a/mindee/parsing/v2/dynamic_field.py b/mindee/parsing/v2/dynamic_field.py new file mode 100644 index 00000000..17d323e7 --- /dev/null +++ b/mindee/parsing/v2/dynamic_field.py @@ -0,0 +1,49 @@ +from enum import Enum +from importlib import import_module + +from mindee.error import MindeeApiV2Error +from mindee.parsing.common.string_dict import StringDict + + +class FieldType(str, Enum): + """Field types.""" + + OBJECT = "ObjectField" + LIST = "ListField" + SIMPLE = "SimpleField" + + +class DynamicField: + """Field that can be displayed in rst format.""" + + _indent_level: int + """Indentation level for rst display.""" + field_type: FieldType + """Field type.""" + + def __init__(self, field_type: FieldType, indent_level=0) -> None: + self.field_type = field_type + self._indent_level = indent_level + + def multi_str(self) -> str: + """String representation of the field in a list.""" + return str(self) + + +def get_field_type(raw_response: StringDict, indent_level: int = 0) -> DynamicField: + """Get appropriate field types.""" + if isinstance(raw_response, dict): + if "value" in raw_response: + field_file = import_module("mindee.parsing.v2.simple_field") + field_class = getattr(field_file, FieldType.SIMPLE.value) + elif "items" in raw_response: + field_file = import_module("mindee.parsing.v2.list_field") + field_class = getattr(field_file, FieldType.LIST.value) + elif "fields" in raw_response: + field_file = import_module("mindee.parsing.v2.object_field") + field_class = getattr(field_file, FieldType.OBJECT.value) + else: + raise MindeeApiV2Error(f"Unrecognized field format in {raw_response}.") + return field_class(raw_response, indent_level) + + raise MindeeApiV2Error(f"Unrecognized field format {raw_response}.") diff --git a/mindee/parsing/v2/inference.py b/mindee/parsing/v2/inference.py index 61e63c49..85b4793d 100644 --- a/mindee/parsing/v2/inference.py +++ b/mindee/parsing/v2/inference.py @@ -25,14 +25,14 @@ def __init__(self, raw_response: StringDict): self.id = raw_response["id"] if "id" in raw_response else None def __str__(self) -> str: + alias = f" {self.file.alias}" if self.file.alias else "" return ( - f"Inference\n" - f"#########\n" - f":Model: {self.model.id}\n" - f":File:\n" - f" :Name: {self.file.name}\n" - f" :Alias: {self.file.alias}\n\n" - f"Result\n" - f"======\n" - f"{self.result}\n" + f"Inference\n#########" + f"\nModel\n=====" + f"\n:ID: {self.model.id}" + f"\n\nFile\n====" + f"\n:Name: {self.file.name}" + f"\n:Alias:{alias}" + f"{self.result}" + "\n" ) diff --git a/mindee/parsing/v2/inference_response.py b/mindee/parsing/v2/inference_response.py index 347bd6ab..f1bb71c2 100644 --- a/mindee/parsing/v2/inference_response.py +++ b/mindee/parsing/v2/inference_response.py @@ -12,3 +12,6 @@ class InferenceResponse(CommonResponse): def __init__(self, raw_response: StringDict) -> None: super().__init__(raw_response) self.inference = Inference(raw_response["inference"]) + + def __str__(self) -> str: + return str(self.inference) diff --git a/mindee/parsing/v2/inference_result.py b/mindee/parsing/v2/inference_result.py index 9fdac6f4..2399752a 100644 --- a/mindee/parsing/v2/inference_result.py +++ b/mindee/parsing/v2/inference_result.py @@ -1,28 +1,28 @@ from typing import Optional from mindee.parsing.common.string_dict import StringDict -from mindee.parsing.v2.base_field import InferenceFields -from mindee.parsing.v2.inference_options import InferenceOptions +from mindee.parsing.v2.inference_result_fields import InferenceResultFields +from mindee.parsing.v2.inference_result_options import InferenceResultOptions class InferenceResult: """Inference result info.""" - fields: InferenceFields + fields: InferenceResultFields """Fields contained in the inference.""" - options: Optional[InferenceOptions] + options: Optional[InferenceResultOptions] """Potential options retrieved alongside the inference.""" def __init__(self, raw_response: StringDict) -> None: - self.fields = InferenceFields(raw_response["fields"]) + self.fields = InferenceResultFields(raw_response["fields"]) self.options = ( - InferenceOptions(raw_response["options"]) + InferenceResultOptions(raw_response["options"]) if raw_response.get("options") else None ) def __str__(self) -> str: - out_str = f":fields: {self.fields}" + out_str = f"\n\nFields\n======{self.fields}" if self.options: - out_str += f"\n:options: {self.options}" + out_str += f"\n\nOptions\n====={self.options}" return out_str diff --git a/mindee/parsing/v2/inference_result_fields.py b/mindee/parsing/v2/inference_result_fields.py new file mode 100644 index 00000000..f30b30c6 --- /dev/null +++ b/mindee/parsing/v2/inference_result_fields.py @@ -0,0 +1,29 @@ +from typing import Dict + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.dynamic_field import DynamicField, FieldType, get_field_type + + +class InferenceResultFields(Dict[str, DynamicField]): + """Inference fields dict.""" + + def __init__(self, raw_response: StringDict, indent_level: int = 0) -> None: + super().__init__() + for key, value in raw_response.items(): + field_obj = get_field_type(value, indent_level) + self[key] = field_obj + + def __getattr__(self, item): + try: + return self[item] + except KeyError: + raise AttributeError(item) from None + + def __str__(self) -> str: + str_fields = "" + for field_key, field_value in self.items(): + if field_value.field_type == FieldType.SIMPLE: + str_fields += f"\n:{field_key}: {field_value}" + else: + str_fields += f"\n:{field_key}:{field_value}" + return str_fields diff --git a/mindee/parsing/v2/inference_options.py b/mindee/parsing/v2/inference_result_options.py similarity index 92% rename from mindee/parsing/v2/inference_options.py rename to mindee/parsing/v2/inference_result_options.py index 22df6880..8eafd547 100644 --- a/mindee/parsing/v2/inference_options.py +++ b/mindee/parsing/v2/inference_result_options.py @@ -4,7 +4,7 @@ from mindee.parsing.v2.raw_text import RawText -class InferenceOptions: +class InferenceResultOptions: """Optional information about the document.""" raw_texts: List[RawText] diff --git a/mindee/parsing/v2/list_field.py b/mindee/parsing/v2/list_field.py new file mode 100644 index 00000000..fc578086 --- /dev/null +++ b/mindee/parsing/v2/list_field.py @@ -0,0 +1,29 @@ +from typing import List + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.dynamic_field import DynamicField, FieldType, get_field_type + + +class ListField(DynamicField): + """List field containing multiple fields.""" + + items: List[DynamicField] + """Items contained in the list.""" + + def __init__(self, raw_response: StringDict, indent_level: int = 0): + super().__init__(FieldType.LIST, indent_level) + + self.items = [] + for item in raw_response["items"]: + self.items.append(get_field_type(item)) + + def __str__(self) -> str: + out_str = "" + indent = " " * self._indent_level + for item in self.items: + if item.field_type == FieldType.SIMPLE: + out_str += f"\n{indent} * {item}" + elif item.field_type == FieldType.OBJECT: + out_str += f"\n{indent} * {item.multi_str()}" + + return out_str diff --git a/mindee/parsing/v2/object_field.py b/mindee/parsing/v2/object_field.py new file mode 100644 index 00000000..994a7b1f --- /dev/null +++ b/mindee/parsing/v2/object_field.py @@ -0,0 +1,41 @@ +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.base_field import BaseField +from mindee.parsing.v2.dynamic_field import FieldType +from mindee.parsing.v2.inference_result_fields import InferenceResultFields + + +class ObjectField(BaseField): + """Object field containing multiple fields.""" + + fields: InferenceResultFields + """Fields contained in the object.""" + + def __init__(self, raw_response: StringDict, indent_level: int = 0): + super().__init__(FieldType.OBJECT, indent_level) + inner_fields = raw_response.get("fields", raw_response) + + self.fields = InferenceResultFields(inner_fields, self._indent_level + 1) + + def single_str(self) -> str: + """String representation of a single object field.""" + out_str = "" + indent = " " * self._indent_level + for field_key, field_value in self.fields.items(): + out_str += f"\n{indent} :{field_key}: {field_value if field_value else ''}" + return out_str + + def multi_str(self) -> str: + """String representation of a list object field.""" + out_str = "" + indent = " " * self._indent_level + first = True + for field_key, field_value in self.fields.items(): + if first: + out_str += f"{indent}:{field_key}: {field_value}" + else: + out_str += f"\n{indent} :{field_key}: {field_value}" + first = False + return out_str + + def __str__(self) -> str: + return self.single_str() diff --git a/mindee/parsing/v2/simple_field.py b/mindee/parsing/v2/simple_field.py new file mode 100644 index 00000000..82445544 --- /dev/null +++ b/mindee/parsing/v2/simple_field.py @@ -0,0 +1,18 @@ +from typing import Union + +from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.v2.base_field import BaseField +from mindee.parsing.v2.dynamic_field import FieldType + + +class SimpleField(BaseField): + """Simple field containing a single value.""" + + value: Union[str, float, bool, None] + + def __init__(self, raw_response: StringDict, indent_level: int = 0): + super().__init__(FieldType.SIMPLE, indent_level) + self.value = raw_response["value"] = raw_response.get("value", None) + + def __str__(self) -> str: + return str(self.value) diff --git a/tests/data b/tests/data index e2912fbd..9ee7c180 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit e2912fbd362b7ccf595a5a8d6cc6a67f78901cde +Subproject commit 9ee7c18088018c8ceeab8ba705b17dc2038d56d8 diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py index 1a382958..b4bae6e7 100644 --- a/tests/test_client_v2.py +++ b/tests/test_client_v2.py @@ -7,7 +7,8 @@ from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.input import LocalInputSource, PathInput from mindee.mindee_http.base_settings import USER_AGENT -from mindee.parsing.v2 import Job, JobResponse +from mindee.parsing.v2.job import Job +from mindee.parsing.v2.job_response import JobResponse from tests.test_inputs import FILE_TYPES_DIR, V2_DATA_DIR from tests.utils import dummy_envvars diff --git a/tests/v2/test_inference_response.py b/tests/v2/test_inference_response.py index b1890f49..d3971c68 100644 --- a/tests/v2/test_inference_response.py +++ b/tests/v2/test_inference_response.py @@ -1,45 +1,36 @@ import json +from typing import Tuple import pytest from mindee import ClientV2, LocalResponse -from mindee.parsing.v2 import ( - Inference, - InferenceFile, - InferenceModel, - InferenceResponse, - ListField, - ObjectField, - SimpleField, -) +from mindee.parsing.v2.inference import Inference +from mindee.parsing.v2.inference_file import InferenceFile +from mindee.parsing.v2.inference_model import InferenceModel +from mindee.parsing.v2.inference_response import InferenceResponse +from mindee.parsing.v2.list_field import ListField +from mindee.parsing.v2.object_field import ObjectField +from mindee.parsing.v2.simple_field import SimpleField from tests.test_inputs import V2_DATA_DIR -@pytest.fixture -def deep_nested_fields() -> dict: - with (V2_DATA_DIR / "inference/deep_nested_fields.json").open( - "r", encoding="utf-8" - ) as fh: - return json.load(fh) - - -@pytest.fixture -def standard_field_types() -> dict: - with (V2_DATA_DIR / "inference/standard_field_types.json").open( - "r", encoding="utf-8" - ) as fh: - return json.load(fh) - - -@pytest.fixture -def raw_texts() -> dict: - with (V2_DATA_DIR / "inference/raw_texts.json").open("r", encoding="utf-8") as fh: - return json.load(fh) +def _get_samples(name: str) -> Tuple[dict, str]: + with (V2_DATA_DIR / "inference" / f"{name}.json").open("r", encoding="utf-8") as fh: + json_sample = json.load(fh) + try: + with (V2_DATA_DIR / "inference" / f"{name}.rst").open( + "r", encoding="utf-8" + ) as fh: + rst_sample = fh.read() + except FileNotFoundError: + rst_sample = "" + return json_sample, rst_sample @pytest.mark.v2 -def test_deep_nested_fields(deep_nested_fields): - inference_result = InferenceResponse(deep_nested_fields) +def test_deep_nested_fields(): + json_sample, rst_sample = _get_samples("deep_nested_fields") + inference_result = InferenceResponse(json_sample) assert isinstance(inference_result.inference, Inference) assert isinstance( inference_result.inference.result.fields.field_simple, SimpleField @@ -107,8 +98,29 @@ def test_deep_nested_fields(deep_nested_fields): @pytest.mark.v2 -def test_raw_texts(raw_texts): - inference_result = InferenceResponse(raw_texts) +def test_standard_field_types(): + json_sample, rst_sample = _get_samples("standard_field_types") + inference_result = InferenceResponse(json_sample) + assert isinstance(inference_result.inference, Inference) + assert isinstance( + inference_result.inference.result.fields.field_simple, SimpleField + ) + assert isinstance( + inference_result.inference.result.fields.field_object, ObjectField + ) + assert isinstance( + inference_result.inference.result.fields.field_simple_list, ListField + ) + assert isinstance( + inference_result.inference.result.fields.field_object_list, ListField + ) + assert rst_sample == str(inference_result) + + +@pytest.mark.v2 +def test_raw_texts(): + json_sample, rst_sample = _get_samples("raw_texts") + inference_result = InferenceResponse(json_sample) assert isinstance(inference_result.inference, Inference) assert inference_result.inference.result.options