Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,32 +195,6 @@ result = mindee_client.parse(
)
```

## Further Reading
Complete details on the working of the library are available in the following guides:

* [Getting started](https://developers.mindee.com/docs/python-getting-started)
* [Python Command Line Interface (CLI)](https://developers.mindee.com/docs/python-cli)
* [Python Generated](https://developers.mindee.com/docs/generated-api-python)
* [Python Custom APIs (Deprecated - API Builder)](https://developers.mindee.com/docs/python-api-builder)
* [Python Invoice OCR](https://developers.mindee.com/docs/python-invoice-ocr)
* [Python International Id OCR](https://developers.mindee.com/docs/python-international-id-ocr)
* [Python Resume OCR](https://developers.mindee.com/docs/python-resume-ocr)
* [Python Receipt OCR](https://developers.mindee.com/docs/python-receipt-ocr)
* [Python Financial Document OCR](https://developers.mindee.com/docs/python-financial-document-ocr)
* [Python Passport OCR](https://developers.mindee.com/docs/python-passport-ocr)
* [Python Proof of Address OCR](https://developers.mindee.com/docs/python-proof-of-address-ocr)
* [Python US Driver License OCR](https://developers.mindee.com/docs/python-eu-driver-license-ocr)
* [Python FR Bank Account Detail OCR](https://developers.mindee.com/docs/python-fr-bank-account-details-ocr)
* [Python FR Carte Grise OCR](https://developers.mindee.com/docs/python-fr-carte-grise-ocr)
* [Python FR Health Card OCR](https://developers.mindee.com/docs/python-fr-health-card-ocr)
* [Python FR ID Card OCR](https://developers.mindee.com/docs/python-fr-carte-nationale-didentite-ocr)
* [Python US Bank Check OCR](https://developers.mindee.com/docs/python-us-bank-check-ocr)
* [Python US W9 OCR](https://developers.mindee.com/docs/python-us-w9-ocr)
* [Python Barcode Reader API](https://developers.mindee.com/docs/python-barcode-reader-ocr)
* [Python Cropper API](https://developers.mindee.com/docs/python-cropper-ocr)
* [Python Invoice Splitter API](https://developers.mindee.com/docs/python-invoice-splitter-api)
* [Python Multi Receipts Detector API](https://developers.mindee.com/docs/python-multi-receipts-detector-ocr)

You can view the source code on [GitHub](https://github.com/mindee/mindee-api-python).

You can also take a look at the
Expand Down
2 changes: 1 addition & 1 deletion docs/extras/code_samples/default_v2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ params = InferenceParameters(
input_source = mindee_client.source_from_path(input_path)

# Upload the file
response = mindee_client.enqueue_and_parse(
response = mindee_client.enqueue_and_get_inference(
input_source, params
)

Expand Down
2 changes: 1 addition & 1 deletion docs/extras/guide/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ custom_endpoint = mindee_client.create_endpoint(
"my-account-name",
# "my-version" # optional
)
result = mindee_client.enqueue_and_parse(product.GeneratedV1, input_doc, endpoint=custom_endpoint)
result = mindee_client.enqueue_and_get_inference(product.GeneratedV1, input_doc, endpoint=custom_endpoint)
```

This is because the `GeneratedV1` class is enough to handle the return processing, but the actual endpoint needs to be specified.
Expand Down
86 changes: 44 additions & 42 deletions mindee/client_v2.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from time import sleep
from typing import Optional, Union
from typing import Optional

from mindee.client_mixin import ClientMixin
from mindee.error.mindee_error import MindeeError
Expand All @@ -14,6 +14,7 @@
is_valid_get_response,
is_valid_post_response,
)
from mindee.parsing.v2.common_response import CommonStatus
from mindee.parsing.v2.inference_response import InferenceResponse
from mindee.parsing.v2.job_response import JobResponse

Expand All @@ -37,7 +38,7 @@ def __init__(self, api_key: Optional[str] = None) -> None:
self.api_key = api_key
self.mindee_api = MindeeApiV2(api_key)

def enqueue(
def enqueue_inference(
self, input_source: LocalInputSource, params: InferenceParameters
) -> JobResponse:
"""
Expand All @@ -49,39 +50,52 @@ def enqueue(
:param params: Parameters to set when sending a file.
:return: A valid inference response.
"""
logger.debug("Enqueuing document to '%s'", params.model_id)
logger.debug("Enqueuing inference using model: %s", params.model_id)

response = self.mindee_api.predict_async_req_post(
input_source=input_source, options=params
response = self.mindee_api.req_post_inference_enqueue(
input_source=input_source, params=params
)
dict_response = response.json()

if not is_valid_post_response(response):
handle_error_v2(dict_response)

return JobResponse(dict_response)

def parse_queued(
self,
queue_id: str,
) -> Union[InferenceResponse, JobResponse]:
def get_job(self, job_id: str) -> JobResponse:
"""
Parses a queued document.
Get the status of an inference that was previously enqueued.

Can be used for polling.

:param queue_id: queue_id received from the API.
:param job_id: UUID of the job to retrieve.
:return: A job response.
"""
logger.debug("Fetching from queue '%s'.", queue_id)
logger.debug("Fetching job: %s", job_id)

response = self.mindee_api.get_inference_from_queue(queue_id)
response = self.mindee_api.req_get_job(job_id)
if not is_valid_get_response(response):
handle_error_v2(response.json())
dict_response = response.json()
return JobResponse(dict_response)

def get_inference(self, inference_id: str) -> InferenceResponse:
"""
Get the result of an inference that was previously enqueued.

The inference will only be available after it has finished processing.

:param inference_id: UUID of the inference to retrieve.
:return: An inference response.
"""
logger.debug("Fetching inference: %s", inference_id)

response = self.mindee_api.req_get_inference(inference_id)
if not is_valid_get_response(response):
handle_error_v2(response.json())
dict_response = response.json()
if "job" in dict_response:
return JobResponse(dict_response)
return InferenceResponse(dict_response)

def enqueue_and_parse(
def enqueue_and_get_inference(
self, input_source: LocalInputSource, params: InferenceParameters
) -> InferenceResponse:
"""
Expand All @@ -101,40 +115,28 @@ def enqueue_and_parse(
params.polling_options.delay_sec,
params.polling_options.max_retries,
)
queue_result = self.enqueue(input_source, params)
enqueue_response = self.enqueue_inference(input_source, params)
logger.debug(
"Successfully enqueued document with job id: %s", queue_result.job.id
"Successfully enqueued inference with job id: %s", enqueue_response.job.id
)
sleep(params.polling_options.initial_delay_sec)
retry_counter = 1
poll_results = self.parse_queued(
queue_result.job.id,
)
while retry_counter < params.polling_options.max_retries:
if not isinstance(poll_results, JobResponse):
break
if poll_results.job.status == "Failed":
if poll_results.job.error:
detail = poll_results.job.error.detail
try_counter = 0
while try_counter < params.polling_options.max_retries:
job_response = self.get_job(enqueue_response.job.id)
if job_response.job.status == CommonStatus.FAILED.value:
if job_response.job.error:
detail = job_response.job.error.detail
else:
detail = "No error detail available."
raise MindeeError(
f"Parsing failed for job {poll_results.job.id}: {detail}"
f"Parsing failed for job {job_response.job.id}: {detail}"
)
logger.debug(
"Polling server for parsing result with job id: %s",
queue_result.job.id,
)
retry_counter += 1
if job_response.job.status == CommonStatus.PROCESSED.value:
return self.get_inference(job_response.job.id)
try_counter += 1
sleep(params.polling_options.delay_sec)
poll_results = self.parse_queued(queue_result.job.id)

if not isinstance(poll_results, InferenceResponse):
raise MindeeError(
f"Couldn't retrieve document after {retry_counter} tries."
)

return poll_results
raise MindeeError(f"Couldn't retrieve document after {try_counter + 1} tries.")

@staticmethod
def load_inference(local_response: LocalResponse) -> InferenceResponse:
Expand Down
2 changes: 1 addition & 1 deletion mindee/input/inference_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class InferenceParameters:
rag: bool = False
"""If set to `True`, will enable Retrieval-Augmented Generation."""
alias: Optional[str] = None
"""Optional alias for the file."""
"""Use an alias to link the file to your own DB. If empty, no alias will be used."""
webhook_ids: Optional[List[str]] = None
"""IDs of webhooks to propagate the API response to."""
polling_options: Optional[PollingOptions] = None
Expand Down
41 changes: 27 additions & 14 deletions mindee/mindee_http/mindee_api_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,45 +67,58 @@ def set_from_env(self) -> None:
func(env_val)
logger.debug("Value was set from env: %s", name)

def predict_async_req_post(
self, input_source: LocalInputSource, options: InferenceParameters
def req_post_inference_enqueue(
self, input_source: LocalInputSource, params: InferenceParameters
) -> requests.Response:
"""
Make an asynchronous request to POST a document for prediction on the V2 API.

:param input_source: Input object.
:param options: Options for the enqueueing of the document.
:param params: Options for the enqueueing of the document.
:return: requests response.
"""
data = {"model_id": options.model_id}
data = {"model_id": params.model_id}
url = f"{self.url_root}/inferences/enqueue"

if options.rag:
if params.rag:
data["rag"] = "true"
if options.webhook_ids and len(options.webhook_ids) > 0:
data["webhook_ids"] = ",".join(options.webhook_ids)
if options.alias and len(options.alias):
data["alias"] = options.alias
if params.webhook_ids and len(params.webhook_ids) > 0:
data["webhook_ids"] = ",".join(params.webhook_ids)
if params.alias and len(params.alias):
data["alias"] = params.alias

files = {"file": input_source.read_contents(options.close_file)}
files = {"file": input_source.read_contents(params.close_file)}
response = requests.post(
url=url,
files=files,
headers=self.base_headers,
data=data,
timeout=self.request_timeout,
)

return response

def get_inference_from_queue(self, queue_id: str) -> requests.Response:
def req_get_job(self, job_id: str) -> requests.Response:
"""
Sends a request matching a given queue_id. Returns either a Job or a Document.

:param job_id: Job ID, returned by the enqueue request.
"""
return requests.get(
f"{self.url_root}/jobs/{job_id}",
headers=self.base_headers,
timeout=self.request_timeout,
allow_redirects=False,
)

def req_get_inference(self, inference_id: str) -> requests.Response:
"""
Sends a request matching a given queue_id. Returns either a Job or a Document.

:param queue_id: queue_id received from the API
:param inference_id: Inference ID, returned by the job request.
"""
return requests.get(
f"{self.url_root}/jobs/{queue_id}",
f"{self.url_root}/inferences/{inference_id}",
headers=self.base_headers,
timeout=self.request_timeout,
allow_redirects=False,
)
9 changes: 9 additions & 0 deletions mindee/parsing/v2/common_response.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
import json
from enum import Enum

from mindee.logger import logger
from mindee.parsing.common.string_dict import StringDict


class CommonStatus(str, Enum):
"""Response status."""

PROCESSING = "Processing"
FAILED = "Failed"
PROCESSED = "Processed"


class CommonResponse:
"""Base class for V1 & V2 responses."""

Expand Down
Loading