Skip to content

Commit d9607aa

Browse files
authored
♻️ update structure of raw texts (#333)
1 parent 4845e62 commit d9607aa

File tree

4 files changed

+67
-89
lines changed

4 files changed

+67
-89
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ repos:
2222
]
2323

2424
- repo: https://github.com/gitleaks/gitleaks
25-
rev: v8.18.2
25+
rev: v8.18.4
2626
hooks:
2727
- id: gitleaks
2828

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,23 @@
1-
from typing import List, Optional
1+
from typing import List
22

33
from mindee.parsing.common.string_dict import StringDict
44

55

6+
class RawText:
7+
"""Raw text extracted from the document."""
8+
9+
page: int
10+
content: str
11+
12+
def __init__(self, raw_response: StringDict):
13+
self.page = raw_response["page"]
14+
self.content = raw_response["content"]
15+
16+
617
class InferenceOptions:
718
"""Optional information about the document."""
819

9-
raw_text: Optional[List[str]]
20+
raw_texts: List[RawText]
1021

1122
def __init__(self, raw_response: StringDict):
12-
self.raw_text = raw_response["raw_text"] if "raw_text" in raw_response else None
23+
self.raw_texts = [RawText(raw_text) for raw_text in raw_response["raw_texts"]]

tests/v2/test_inference_response.py

Lines changed: 51 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import pytest
44

55
from mindee import ClientV2, LocalResponse
6-
from mindee.parsing.common.string_dict import StringDict
76
from mindee.parsing.v2 import (
87
Inference,
98
InferenceFile,
@@ -17,90 +16,30 @@
1716

1817

1918
@pytest.fixture
20-
def inference_result_json() -> StringDict:
21-
return {
22-
"inference": {
23-
"model": {"id": "test-model-id"},
24-
"file": {"name": "test-file-name.jpg", "alias": None},
25-
"result": {
26-
"fields": {
27-
"field_simple": {"value": "value_1"},
28-
"field_object": {
29-
"fields": {
30-
"sub_object_simple": {"value": "value_2"},
31-
"sub_object_list": {
32-
"items": [
33-
{
34-
"fields": {
35-
"sub_object_list_sub_list_simple": {
36-
"value": "value_3"
37-
}
38-
}
39-
},
40-
{
41-
"fields": {
42-
"sub_object_list_sub_list_object_subobject_1": {
43-
"value": "value_4"
44-
},
45-
"sub_object_list_sub_list_object_subobject_2": {
46-
"value": "value_5"
47-
},
48-
}
49-
},
50-
]
51-
},
52-
"sub_object_object": {
53-
"fields": {
54-
"sub_object_object_sub_object_simple": {
55-
"value": "value_6"
56-
},
57-
"sub_object_object_sub_object_object": {
58-
"fields": {
59-
"sub_object_object_sub_object_object_simple_1": {
60-
"value": "value_7"
61-
},
62-
"sub_object_object_sub_object_object_simple_2": {
63-
"value": "value_8"
64-
},
65-
}
66-
},
67-
"sub_object_object_sub_object_list": {
68-
"items": [
69-
{
70-
"fields": {
71-
"sub_object_object_sub_object_list_simple": {
72-
"value": "value_9"
73-
},
74-
"sub_object_object_sub_object_list_object": {
75-
"fields": {
76-
"sub_object_object_sub_object_list_object_subobject_1": {
77-
"value": "value_10"
78-
},
79-
"sub_object_object_sub_object_list_object_subobject_2": {
80-
"value": "value_11"
81-
},
82-
}
83-
},
84-
}
85-
}
86-
]
87-
},
88-
}
89-
},
90-
}
91-
},
92-
},
93-
"options": {
94-
"raw_text": ["toto", "tata", "titi"],
95-
},
96-
},
97-
}
98-
}
19+
def deep_nested_fields() -> dict:
20+
with (V2_DATA_DIR / "inference/deep_nested_fields.json").open(
21+
"r", encoding="utf-8"
22+
) as fh:
23+
return json.load(fh)
24+
25+
26+
@pytest.fixture
27+
def standard_field_types() -> dict:
28+
with (V2_DATA_DIR / "inference/standard_field_types.json").open(
29+
"r", encoding="utf-8"
30+
) as fh:
31+
return json.load(fh)
32+
33+
34+
@pytest.fixture
35+
def raw_texts() -> dict:
36+
with (V2_DATA_DIR / "inference/raw_texts.json").open("r", encoding="utf-8") as fh:
37+
return json.load(fh)
9938

10039

10140
@pytest.mark.v2
102-
def test_inference_response(inference_result_json):
103-
inference_result = InferenceResponse(inference_result_json)
41+
def test_deep_nested_fields(deep_nested_fields):
42+
inference_result = InferenceResponse(deep_nested_fields)
10443
assert isinstance(inference_result.inference, Inference)
10544
assert isinstance(
10645
inference_result.inference.result.fields.field_simple, SimpleField
@@ -166,9 +105,37 @@ def test_inference_response(inference_result_json):
166105
== "value_9"
167106
)
168107

108+
109+
@pytest.mark.v2
110+
def test_deep_nested_fields(standard_field_types):
111+
inference_result = InferenceResponse(standard_field_types)
112+
assert isinstance(inference_result.inference, Inference)
113+
assert isinstance(
114+
inference_result.inference.result.fields.field_simple, SimpleField
115+
)
116+
assert isinstance(
117+
inference_result.inference.result.fields.field_object, ObjectField
118+
)
119+
assert isinstance(
120+
inference_result.inference.result.fields.field_simple_list, ListField
121+
)
122+
assert isinstance(
123+
inference_result.inference.result.fields.field_object_list, ListField
124+
)
125+
126+
127+
@pytest.mark.v2
128+
def test_raw_texts(raw_texts):
129+
inference_result = InferenceResponse(raw_texts)
130+
assert isinstance(inference_result.inference, Inference)
131+
169132
assert inference_result.inference.result.options
170-
assert len(inference_result.inference.result.options.raw_text) == 3
171-
assert inference_result.inference.result.options.raw_text[0] == "toto"
133+
assert len(inference_result.inference.result.options.raw_texts) == 2
134+
assert inference_result.inference.result.options.raw_texts[0].page == 0
135+
assert (
136+
inference_result.inference.result.options.raw_texts[0].content
137+
== "This is the raw text of the first page..."
138+
)
172139

173140

174141
@pytest.mark.v2

0 commit comments

Comments
 (0)