mindee · ianardee · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025 · Jul 16, 2025
diff --git a/docs/extras/code_samples/default_v2.txt b/docs/extras/code_samples/default_v2.txt
@@ -1,4 +1,4 @@
-from mindee import ClientV2, InferencePredictOptions
+from mindee import ClientV2, InferenceParameters
 
 input_path = "/path/to/the/file.ext"
 api_key = "MY_API_KEY"
@@ -7,20 +7,20 @@ model_id = "MY_MODEL_ID"
 # Init a new client
 mindee_client = ClientV2(api_key)
 
-# Set inference options
-options = InferencePredictOptions(
+# Set inference paramters
+params = InferenceParameters(
     # ID of the model, required.
     model_id=model_id,
     # If set to `True`, will enable Retrieval-Augmented Generation.
     rag=False,
 )
 
 # Load a file from disk
-input_doc = mindee_client.source_from_path(input_path)
+input_source = mindee_client.source_from_path(input_path)
 
 # Upload the file
 response = mindee_client.enqueue_and_parse(
-    input_doc, options
+    input_source, params
 )
 
 # Print a brief summary of the parsed data

diff --git a/mindee/__init__.py b/mindee/__init__.py
@@ -1,12 +1,32 @@
 from mindee import product
 from mindee.client import Client
 from mindee.client_v2 import ClientV2
-from mindee.input.inference_predict_options import InferencePredictOptions
+from mindee.input.inference_parameters import InferenceParameters
 from mindee.input.local_response import LocalResponse
 from mindee.input.page_options import PageOptions
+from mindee.input.polling_options import PollingOptions
 from mindee.parsing.common.api_response import ApiResponse
 from mindee.parsing.common.async_predict_response import AsyncPredictResponse
 from mindee.parsing.common.feedback_response import FeedbackResponse
 from mindee.parsing.common.job import Job
 from mindee.parsing.common.predict_response import PredictResponse
 from mindee.parsing.common.workflow_response import WorkflowResponse
+from mindee.parsing.v2.inference_response import InferenceResponse
+from mindee.parsing.v2.job_response import JobResponse
+
+__all__ = [
+    "Client",
+    "ClientV2",
+    "InferenceParameters",
+    "LocalResponse",
+    "PageOptions",
+    "PollingOptions",
+    "ApiResponse",
+    "AsyncPredictResponse",
+    "FeedbackResponse",
+    "PredictResponse",
+    "WorkflowResponse",
+    "JobResponse",
+    "InferenceResponse",
+    "product",
+]
diff --git a/mindee/client_v2.py b/mindee/client_v2.py
@@ -4,7 +4,7 @@
 from mindee.client_mixin import ClientMixin
 from mindee.error.mindee_error import MindeeError
 from mindee.error.mindee_http_error_v2 import handle_error_v2
-from mindee.input.inference_predict_options import InferencePredictOptions
+from mindee.input.inference_parameters import InferenceParameters
 from mindee.input.local_response import LocalResponse
 from mindee.input.polling_options import PollingOptions
 from mindee.input.sources.local_input_source import LocalInputSource
@@ -38,28 +38,21 @@ def __init__(self, api_key: Optional[str] = None) -> None:
         self.mindee_api = MindeeApiV2(api_key)
 
     def enqueue(
-        self, input_source: LocalInputSource, options: InferencePredictOptions
+        self, input_source: LocalInputSource, params: InferenceParameters
     ) -> JobResponse:
         """
         Enqueues a document to a given model.
 
         :param input_source: The document/source file to use.
             Has to be created beforehand.
 
-        :param options: Options for the prediction.
+        :param params: Parameters to set when sending a file.
         :return: A valid inference response.
         """
-        logger.debug("Enqueuing document to '%s'", options.model_id)
-
-        if options.page_options and input_source.is_pdf():
-            input_source.process_pdf(
-                options.page_options.operation,
-                options.page_options.on_min_pages,
-                options.page_options.page_indexes,
-            )
+        logger.debug("Enqueuing document to '%s'", params.model_id)
 
         response = self.mindee_api.predict_async_req_post(
-            input_source=input_source, options=options
+            input_source=input_source, options=params
         )
         dict_response = response.json()
 
@@ -89,35 +82,35 @@ def parse_queued(
         return InferenceResponse(dict_response)
 
     def enqueue_and_parse(
-        self, input_source: LocalInputSource, options: InferencePredictOptions
+        self, input_source: LocalInputSource, params: InferenceParameters
     ) -> InferenceResponse:
         """
         Enqueues to an asynchronous endpoint and automatically polls for a response.
 
         :param input_source: The document/source file to use.
             Has to be created beforehand.
 
-        :param options: Options for the prediction.
+        :param params: Parameters to set when sending a file.
 
         :return: A valid inference response.
         """
-        if not options.polling_options:
-            options.polling_options = PollingOptions()
+        if not params.polling_options:
+            params.polling_options = PollingOptions()
         self._validate_async_params(
-            options.polling_options.initial_delay_sec,
-            options.polling_options.delay_sec,
-            options.polling_options.max_retries,
+            params.polling_options.initial_delay_sec,
+            params.polling_options.delay_sec,
+            params.polling_options.max_retries,
         )
-        queue_result = self.enqueue(input_source, options)
+        queue_result = self.enqueue(input_source, params)
         logger.debug(
             "Successfully enqueued document with job id: %s", queue_result.job.id
         )
-        sleep(options.polling_options.initial_delay_sec)
+        sleep(params.polling_options.initial_delay_sec)
         retry_counter = 1
         poll_results = self.parse_queued(
             queue_result.job.id,
         )
-        while retry_counter < options.polling_options.max_retries:
+        while retry_counter < params.polling_options.max_retries:
             if not isinstance(poll_results, JobResponse):
                 break
             if poll_results.job.status == "Failed":
@@ -133,7 +126,7 @@ def enqueue_and_parse(
                 queue_result.job.id,
             )
             retry_counter += 1
-            sleep(options.polling_options.delay_sec)
+            sleep(params.polling_options.delay_sec)
             poll_results = self.parse_queued(queue_result.job.id)
 
         if not isinstance(poll_results, InferenceResponse):

diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py
@@ -1,4 +1,3 @@
-from mindee.input.inference_predict_options import InferencePredictOptions
 from mindee.input.local_response import LocalResponse
 from mindee.input.page_options import PageOptions
 from mindee.input.polling_options import PollingOptions

diff --git a/mindee/input/inference_predict_options.py → mindee/input/inference_parameters.py b/mindee/input/inference_predict_options.py → mindee/input/inference_parameters.py
@@ -1,13 +1,12 @@
 from dataclasses import dataclass
 from typing import List, Optional
 
-from mindee.input.page_options import PageOptions
 from mindee.input.polling_options import PollingOptions
 
 
 @dataclass
-class InferencePredictOptions:
-    """Inference prediction options."""
+class InferenceParameters:
+    """Inference parameters to set when sending a file."""
 
     model_id: str
     """ID of the model, required."""
@@ -17,9 +16,7 @@ class InferencePredictOptions:
     """Optional alias for the file."""
     webhook_ids: Optional[List[str]] = None
     """IDs of webhooks to propagate the API response to."""
-    page_options: Optional[PageOptions] = None
-    """Options for page-level inference."""
     polling_options: Optional[PollingOptions] = None
-    """Options for polling."""
+    """Options for polling. Set only if having timeout issues."""
     close_file: bool = True
     """Whether to close the file after parsing."""
diff --git a/mindee/input/polling_options.py b/mindee/input/polling_options.py
@@ -4,9 +4,9 @@ class PollingOptions:
     initial_delay_sec: float
     """Initial delay before the first polling attempt."""
     delay_sec: float
-    """Delay between each polling attempts."""
+    """Delay between each polling attempt."""
     max_retries: int
-    """Total amount of polling attempts."""
+    """Total number of polling attempts."""
 
     def __init__(
         self,

diff --git a/mindee/input/sources/local_input_source.py b/mindee/input/sources/local_input_source.py
@@ -8,7 +8,7 @@
 from mindee.error.mimetype_error import MimeTypeError
 from mindee.error.mindee_error import MindeeError, MindeeSourceError
 from mindee.image_operations.image_compressor import compress_image
-from mindee.input.page_options import KEEP_ONLY, REMOVE
+from mindee.input.page_options import KEEP_ONLY, REMOVE, PageOptions
 from mindee.input.sources.input_type import InputType
 from mindee.logger import logger
 from mindee.pdf.pdf_compressor import compress_pdf
@@ -112,6 +112,16 @@ def count_doc_pages(self) -> int:
             return len(pdf)
         return 1
 
+    def apply_page_options(self, page_options: PageOptions) -> None:
+        """Apply cut and merge options on multipage documents."""
+        if not self.is_pdf():
+            raise MindeeSourceError(f"File is not a PDF: {self.filename}")
+        self.process_pdf(
+            page_options.operation,
+            page_options.on_min_pages,
+            page_options.page_indexes,
+        )
+
     def process_pdf(
         self,
         behavior: str,

diff --git a/mindee/mindee_http/mindee_api_v2.py b/mindee/mindee_http/mindee_api_v2.py
@@ -5,7 +5,7 @@
 
 from mindee.error.mindee_error import MindeeApiV2Error
 from mindee.input import LocalInputSource
-from mindee.input.inference_predict_options import InferencePredictOptions
+from mindee.input.inference_parameters import InferenceParameters
 from mindee.logger import logger
 from mindee.mindee_http.base_settings import USER_AGENT
 from mindee.mindee_http.settings_mixin import SettingsMixin
@@ -68,7 +68,7 @@ def set_from_env(self) -> None:
                 logger.debug("Value was set from env: %s", name)
 
     def predict_async_req_post(
-        self, input_source: LocalInputSource, options: InferencePredictOptions
+        self, input_source: LocalInputSource, options: InferenceParameters
     ) -> requests.Response:
         """
         Make an asynchronous request to POST a document for prediction on the V2 API.

diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from mindee import ClientV2, InferencePredictOptions, LocalResponse
+from mindee import ClientV2, InferenceParameters, LocalResponse
 from mindee.error.mindee_error import MindeeApiV2Error
 from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2
 from mindee.input import LocalInputSource, PathInput
@@ -96,9 +96,7 @@ def test_enqueue_path_with_env_token(custom_base_url_client):
         f"{FILE_TYPES_DIR}/receipt.jpg"
     )
     with pytest.raises(MindeeHTTPErrorV2):
-        custom_base_url_client.enqueue(
-            input_doc, InferencePredictOptions("dummy-model")
-        )
+        custom_base_url_client.enqueue(input_doc, InferenceParameters("dummy-model"))
 
 
 @pytest.mark.v2
@@ -108,7 +106,7 @@ def test_enqueue_and_parse_path_with_env_token(custom_base_url_client):
     )
     with pytest.raises(MindeeHTTPErrorV2):
         custom_base_url_client.enqueue_and_parse(
-            input_doc, InferencePredictOptions("dummy-model")
+            input_doc, InferenceParameters("dummy-model")
         )
 
 
@@ -128,7 +126,7 @@ def test_error_handling(custom_base_url_client):
             PathInput(
                 V2_DATA_DIR / "products" / "financial_document" / "default_sample.jpg"
             ),
-            InferencePredictOptions("dummy-model"),
+            InferenceParameters("dummy-model"),
         )
         assert e.status_code == -1
         assert e.detail == "forced failure from test"

diff --git a/tests/test_client_v2_integration.py b/tests/test_client_v2_integration.py
@@ -5,7 +5,7 @@
 
 import pytest
 
-from mindee import ClientV2, InferencePredictOptions
+from mindee import ClientV2, InferenceParameters
 from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2
 from mindee.parsing.v2.inference_response import InferenceResponse
 from tests.test_inputs import FILE_TYPES_DIR, PRODUCT_DATA_DIR
@@ -40,7 +40,7 @@ def test_parse_file_empty_multiple_pages_must_succeed(
     assert input_path.exists(), f"sample file missing: {input_path}"
 
     input_doc = v2_client.source_from_path(input_path)
-    options = InferencePredictOptions(findoc_model_id)
+    options = InferenceParameters(findoc_model_id)
 
     response: InferenceResponse = v2_client.enqueue_and_parse(input_doc, options)
 
@@ -66,7 +66,7 @@ def test_parse_file_filled_single_page_must_succeed(
     assert input_path.exists(), f"sample file missing: {input_path}"
 
     input_doc = v2_client.source_from_path(input_path)
-    options = InferencePredictOptions(findoc_model_id)
+    options = InferenceParameters(findoc_model_id)
 
     response: InferenceResponse = v2_client.enqueue_and_parse(input_doc, options)
 
@@ -95,7 +95,7 @@ def test_invalid_uuid_must_throw_error_422(v2_client: ClientV2) -> None:
     assert input_path.exists()
 
     input_doc = v2_client.source_from_path(input_path)
-    options = InferencePredictOptions("INVALID MODEL ID")
+    options = InferenceParameters("INVALID MODEL ID")
 
     with pytest.raises(MindeeHTTPErrorV2) as exc_info:
         v2_client.enqueue(input_doc, options)

diff --git a/tests/test_inputs.py b/tests/test_inputs.py
@@ -6,7 +6,7 @@
 
 from mindee.error.mimetype_error import MimeTypeError
 from mindee.error.mindee_error import MindeeError, MindeeSourceError
-from mindee.input.page_options import KEEP_ONLY, REMOVE
+from mindee.input.page_options import KEEP_ONLY, REMOVE, PageOptions
 from mindee.input.sources.base_64_input import Base64Input
 from mindee.input.sources.bytes_input import BytesInput
 from mindee.input.sources.file_input import FileInput
@@ -45,15 +45,8 @@ def test_pdf_reconstruct_no_cut():
     assert isinstance(input_file.file_object, io.BufferedReader)
 
 
-@pytest.mark.parametrize("numb_pages", [1, 2, 3])
-def test_pdf_cut_n_pages(numb_pages: int):
-    input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf")
+def _assert_pdf_options(input_obj, numb_pages):
     assert input_obj.is_pdf() is True
-    input_obj.process_pdf(
-        behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages]
-    )
-    assert input_obj.count_doc_pages() == numb_pages
-
     # Currently the least verbose way of comparing pages with pypdfium2
     # I.e. each page is read & rendered as a rasterized image. These images are then compared as raw byte sequences.
     cut_pdf = pdfium.PdfDocument(input_obj.file_object)
@@ -69,6 +62,26 @@ def test_pdf_cut_n_pages(numb_pages: int):
     pdf.close()
 
 
+@pytest.mark.parametrize("numb_pages", [1, 2, 3])
+def test_process_pdf_cut_n_pages(numb_pages: int):
+    input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf")
+    input_obj.process_pdf(
+        behavior=KEEP_ONLY, on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages]
+    )
+    assert input_obj.count_doc_pages() == numb_pages
+    _assert_pdf_options(input_obj, numb_pages)
+
+
+@pytest.mark.parametrize("numb_pages", [1, 2, 3])
+def test_apply_pages_pdf_cut_n_pages(numb_pages: int):
+    input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf")
+    input_obj.apply_page_options(
+        PageOptions(on_min_pages=2, page_indexes=[0, -2, -1][:numb_pages])
+    )
+    assert input_obj.count_doc_pages() == numb_pages
+    _assert_pdf_options(input_obj, numb_pages)
+
+
 def test_pdf_keep_5_first_pages():
     input_obj = PathInput(FILE_TYPES_DIR / "pdf" / "multipage.pdf")
     assert input_obj.is_pdf() is True