Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/unstract/llmwhisperer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.4.1"
__version__ = "2.4.2"

from .client_v2 import LLMWhispererClientV2 # noqa: F401

Expand Down
20 changes: 16 additions & 4 deletions src/unstract/llmwhisperer/client_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,13 +351,23 @@ def generate() -> Generator[bytes, None, None]:
s = requests.Session()
response = s.send(prepared, timeout=wait_timeout, stream=should_stream)
response.encoding = encoding
if response.status_code != 200 and response.status_code != 202:
message = json.loads(response.text)
if response.status_code not in (200, 202):
try:
message = json.loads(response.text)
if not isinstance(message, dict):
message = {"message": str(message)}
except (json.JSONDecodeError, ValueError):
message = {"message": response.text}
message["status_code"] = response.status_code
message["extraction"] = {}
raise LLMWhispererClientException(message)
if response.status_code == 202:
message = json.loads(response.text)
try:
message = json.loads(response.text)
if not isinstance(message, dict):
message = {"message": str(message)}
except (json.JSONDecodeError, ValueError):
message = {"message": response.text}
message["status_code"] = response.status_code
message["extraction"] = {}
if not wait_for_completion:
Expand Down Expand Up @@ -455,7 +465,9 @@ def whisper_status(self, whisper_hash: str) -> Any:
# Truncate response text if too long to avoid log pollution
response_preview = response.text[:500] + "..." if len(response.text) > 500 else response.text
self.logger.error(f"API error - JSON decode failed: {e}; Response preview: {response_preview!r}")
raise LLMWhispererClientException(f"API error: non-JSON response - {response_preview}", response.status_code) from e
raise LLMWhispererClientException(
f"API error: non-JSON response - {response_preview}", response.status_code
) from e
raise LLMWhispererClientException(err, response.status_code)
message = json.loads(response.text)
message["status_code"] = response.status_code
Expand Down
75 changes: 74 additions & 1 deletion tests/unit/client_v2_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from typing import Any
from unittest.mock import MagicMock

from unstract.llmwhisperer.client_v2 import LLMWhispererClientV2
import pytest
from unstract.llmwhisperer.client_v2 import LLMWhispererClientException, LLMWhispererClientV2

WEBHOOK_URL = "http://test-webhook.com/callback"
AUTH_TOKEN = "dummy-auth-token"
Expand Down Expand Up @@ -34,3 +35,75 @@ def test_get_webhook_details(mocker: Any, client_v2: LLMWhispererClientV2) -> No

assert response["status"] == "success"
assert response["webhook_details"]["url"] == WEBHOOK_URL


def test_whisper_json_string_response_error(mocker: Any, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper method handles JSON string responses correctly for error
cases."""
mock_send = mocker.patch("requests.Session.send")
mock_response = MagicMock()
mock_response.status_code = 400
mock_response.text = '"Error message as JSON string"'
mock_response.encoding = "utf-8"
mock_send.return_value = mock_response

with pytest.raises(LLMWhispererClientException) as exc_info:
client_v2.whisper(url="https://example.com/test.pdf")

error = exc_info.value.args[0]
assert error["message"] == "Error message as JSON string"
assert error["status_code"] == 400
assert error["extraction"] == {}


def test_whisper_json_string_response_202(mocker: Any, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper method handles JSON string responses correctly for 202
status."""
mock_send = mocker.patch("requests.Session.send")
mock_response = MagicMock()
mock_response.status_code = 202
mock_response.text = '"Processing in progress"'
mock_response.encoding = "utf-8"
mock_send.return_value = mock_response

response = client_v2.whisper(url="https://example.com/test.pdf", wait_for_completion=False)

assert response["message"] == "Processing in progress"
assert response["status_code"] == 202
assert response["extraction"] == {}


def test_whisper_invalid_json_response_error(mocker: Any, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper method handles invalid JSON responses correctly for error
cases."""
mock_send = mocker.patch("requests.Session.send")
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.text = "Invalid JSON response"
mock_response.encoding = "utf-8"
mock_send.return_value = mock_response

with pytest.raises(LLMWhispererClientException) as exc_info:
client_v2.whisper(url="https://example.com/test.pdf")

error = exc_info.value.args[0]
assert error["message"] == "Invalid JSON response"
assert error["status_code"] == 500
assert error["extraction"] == {}


def test_whisper_invalid_json_response_202(mocker: Any, client_v2: LLMWhispererClientV2) -> None:
"""Test whisper method handles invalid JSON responses correctly for 202
status."""
mock_send = mocker.patch("requests.Session.send")
mock_response = MagicMock()
mock_response.status_code = 202
mock_response.text = "Invalid JSON response"
mock_response.encoding = "utf-8"
mock_send.return_value = mock_response

response = client_v2.whisper(url="https://example.com/test.pdf", wait_for_completion=False)

assert response["message"] == "Invalid JSON response"
assert response["status_code"] == 202
assert response["extraction"] == {}