Skip to content

Commit 731eb03

Browse files
✨ add support for remote sources (#344)
1 parent bc7f354 commit 731eb03

20 files changed

+104
-53
lines changed

.github/workflows/_test-integrations.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
5050
MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }}
5151
MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
52+
MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }}
5253
run: |
5354
pytest -m integration
5455

mindee/client.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -562,16 +562,3 @@ def create_endpoint(
562562
)
563563
version = "1"
564564
return self._build_endpoint(endpoint_name, account_name, version)
565-
566-
@staticmethod
567-
def source_from_url(
568-
url: str,
569-
) -> UrlInputSource:
570-
"""
571-
Load a document from a URL.
572-
573-
:param url: Raw byte input
574-
"""
575-
return UrlInputSource(
576-
url,
577-
)

mindee/client_mixin.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import BinaryIO, Union
33

44
from mindee.error import MindeeClientError
5-
from mindee.input import Base64Input, BytesInput, FileInput, PathInput
5+
from mindee.input import Base64Input, BytesInput, FileInput, PathInput, UrlInputSource
66

77

88
class ClientMixin:
@@ -89,3 +89,16 @@ def _validate_async_params(
8989
)
9090
if max_retries < min_retries:
9191
raise MindeeClientError(f"Cannot set retries to less than {min_retries}.")
92+
93+
@staticmethod
94+
def source_from_url(
95+
url: str,
96+
) -> UrlInputSource:
97+
"""
98+
Load a document from a URL.
99+
100+
:param url: Raw byte input
101+
"""
102+
return UrlInputSource(
103+
url,
104+
)

mindee/client_v2.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from time import sleep
2-
from typing import Optional
2+
from typing import Optional, Union
33

44
from mindee.client_mixin import ClientMixin
55
from mindee.error.mindee_error import MindeeError
66
from mindee.error.mindee_http_error_v2 import handle_error_v2
7+
from mindee.input import UrlInputSource
78
from mindee.input.inference_parameters import InferenceParameters
89
from mindee.input.polling_options import PollingOptions
910
from mindee.input.sources.local_input_source import LocalInputSource
@@ -13,7 +14,7 @@
1314
is_valid_get_response,
1415
is_valid_post_response,
1516
)
16-
from mindee.parsing.v2.common_response import CommonStatus
17+
from mindee.parsing.v2.field.common_response import CommonStatus
1718
from mindee.parsing.v2.inference_response import InferenceResponse
1819
from mindee.parsing.v2.job_response import JobResponse
1920

@@ -38,13 +39,14 @@ def __init__(self, api_key: Optional[str] = None) -> None:
3839
self.mindee_api = MindeeApiV2(api_key)
3940

4041
def enqueue_inference(
41-
self, input_source: LocalInputSource, params: InferenceParameters
42+
self,
43+
input_source: Union[LocalInputSource, UrlInputSource],
44+
params: InferenceParameters,
4245
) -> JobResponse:
4346
"""
4447
Enqueues a document to a given model.
4548
46-
:param input_source: The document/source file to use.
47-
Has to be created beforehand.
49+
:param input_source: The document/source file to use. Can be local or remote.
4850
4951
:param params: Parameters to set when sending a file.
5052
:return: A valid inference response.
@@ -95,13 +97,14 @@ def get_inference(self, inference_id: str) -> InferenceResponse:
9597
return InferenceResponse(dict_response)
9698

9799
def enqueue_and_get_inference(
98-
self, input_source: LocalInputSource, params: InferenceParameters
100+
self,
101+
input_source: Union[LocalInputSource, UrlInputSource],
102+
params: InferenceParameters,
99103
) -> InferenceResponse:
100104
"""
101105
Enqueues to an asynchronous endpoint and automatically polls for a response.
102106
103-
:param input_source: The document/source file to use.
104-
Has to be created beforehand.
107+
:param input_source: The document/source file to use. Can be local or remote.
105108
106109
:param params: Parameters to set when sending a file.
107110

mindee/input/local_response.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any, BinaryIO, Dict, Type, TypeVar, Union
88

99
from mindee.error.mindee_error import MindeeError
10-
from mindee.parsing.v2.common_response import CommonResponse
10+
from mindee.parsing.v2.field.common_response import CommonResponse
1111

1212

1313
class LocalResponse:

mindee/mindee_http/mindee_api_v2.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import os
2-
from typing import Dict, Optional
2+
from typing import Dict, Optional, Union
33

44
import requests
55

66
from mindee.error.mindee_error import MindeeApiV2Error
7-
from mindee.input import LocalInputSource
7+
from mindee.input import LocalInputSource, UrlInputSource
88
from mindee.input.inference_parameters import InferenceParameters
99
from mindee.logger import logger
1010
from mindee.mindee_http.base_settings import USER_AGENT
@@ -68,7 +68,9 @@ def set_from_env(self) -> None:
6868
logger.debug("Value was set from env: %s", name)
6969

7070
def req_post_inference_enqueue(
71-
self, input_source: LocalInputSource, params: InferenceParameters
71+
self,
72+
input_source: Union[LocalInputSource, UrlInputSource],
73+
params: InferenceParameters,
7274
) -> requests.Response:
7375
"""
7476
Make an asynchronous request to POST a document for prediction on the V2 API.
@@ -87,14 +89,25 @@ def req_post_inference_enqueue(
8789
if params.alias and len(params.alias):
8890
data["alias"] = params.alias
8991

90-
files = {"file": input_source.read_contents(params.close_file)}
91-
response = requests.post(
92-
url=url,
93-
files=files,
94-
headers=self.base_headers,
95-
data=data,
96-
timeout=self.request_timeout,
97-
)
92+
if isinstance(input_source, LocalInputSource):
93+
files = {"file": input_source.read_contents(params.close_file)}
94+
response = requests.post(
95+
url=url,
96+
files=files,
97+
headers=self.base_headers,
98+
data=data,
99+
timeout=self.request_timeout,
100+
)
101+
elif isinstance(input_source, UrlInputSource):
102+
data["url"] = input_source.url
103+
response = requests.post(
104+
url=url,
105+
headers=self.base_headers,
106+
data=data,
107+
timeout=self.request_timeout,
108+
)
109+
else:
110+
raise MindeeApiV2Error("Invalid input source.")
98111
return response
99112

100113
def req_get_job(self, job_id: str) -> requests.Response:

mindee/parsing/v2/field/__init__.py

Whitespace-only changes.

mindee/parsing/v2/base_field.py renamed to mindee/parsing/v2/field/base_field.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import List, Optional
22

3-
from mindee.parsing.v2.dynamic_field import DynamicField
3+
from mindee.parsing.v2.field.dynamic_field import DynamicField
44

55

66
class BaseField(DynamicField):
File renamed without changes.

mindee/parsing/v2/dynamic_field.py renamed to mindee/parsing/v2/field/dynamic_field.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@ def get_field_type(raw_response: StringDict, indent_level: int = 0) -> DynamicFi
3434
"""Get appropriate field types."""
3535
if isinstance(raw_response, dict):
3636
if "value" in raw_response:
37-
field_file = import_module("mindee.parsing.v2.simple_field")
37+
field_file = import_module("mindee.parsing.v2.field.simple_field")
3838
field_class = getattr(field_file, FieldType.SIMPLE.value)
3939
elif "items" in raw_response:
40-
field_file = import_module("mindee.parsing.v2.list_field")
40+
field_file = import_module("mindee.parsing.v2.field.list_field")
4141
field_class = getattr(field_file, FieldType.LIST.value)
4242
elif "fields" in raw_response:
43-
field_file = import_module("mindee.parsing.v2.object_field")
43+
field_file = import_module("mindee.parsing.v2.field.object_field")
4444
field_class = getattr(field_file, FieldType.OBJECT.value)
4545
else:
4646
raise MindeeApiV2Error(f"Unrecognized field format in {raw_response}.")

0 commit comments

Comments
 (0)