From ae923965bbd629633f849e7609ef2bba571c5639 Mon Sep 17 00:00:00 2001 From: johnyrahul Date: Mon, 21 Jul 2025 12:15:20 +0530 Subject: [PATCH] Fix TypeError in whisper method when API returns JSON string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add robust error handling for JSON string responses in whisper method - Ensure message is always a dict before assigning status_code - Handle both JSON decode errors and non-dict JSON responses - Add comprehensive unit tests for all error scenarios - Bump version to 2.4.2 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- src/unstract/llmwhisperer/__init__.py | 2 +- src/unstract/llmwhisperer/client_v2.py | 20 +++++-- tests/unit/client_v2_test.py | 75 +++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 6 deletions(-) diff --git a/src/unstract/llmwhisperer/__init__.py b/src/unstract/llmwhisperer/__init__.py index 9ad11ab..c491098 100644 --- a/src/unstract/llmwhisperer/__init__.py +++ b/src/unstract/llmwhisperer/__init__.py @@ -1,4 +1,4 @@ -__version__ = "2.4.1" +__version__ = "2.4.2" from .client_v2 import LLMWhispererClientV2 # noqa: F401 diff --git a/src/unstract/llmwhisperer/client_v2.py b/src/unstract/llmwhisperer/client_v2.py index 68cc7bb..1a54d25 100644 --- a/src/unstract/llmwhisperer/client_v2.py +++ b/src/unstract/llmwhisperer/client_v2.py @@ -351,13 +351,23 @@ def generate() -> Generator[bytes, None, None]: s = requests.Session() response = s.send(prepared, timeout=wait_timeout, stream=should_stream) response.encoding = encoding - if response.status_code != 200 and response.status_code != 202: - message = json.loads(response.text) + if response.status_code not in (200, 202): + try: + message = json.loads(response.text) + if not isinstance(message, dict): + message = {"message": str(message)} + except (json.JSONDecodeError, ValueError): + message = {"message": response.text} message["status_code"] = response.status_code message["extraction"] = {} raise LLMWhispererClientException(message) if response.status_code == 202: - message = json.loads(response.text) + try: + message = json.loads(response.text) + if not isinstance(message, dict): + message = {"message": str(message)} + except (json.JSONDecodeError, ValueError): + message = {"message": response.text} message["status_code"] = response.status_code message["extraction"] = {} if not wait_for_completion: @@ -455,7 +465,9 @@ def whisper_status(self, whisper_hash: str) -> Any: # Truncate response text if too long to avoid log pollution response_preview = response.text[:500] + "..." if len(response.text) > 500 else response.text self.logger.error(f"API error - JSON decode failed: {e}; Response preview: {response_preview!r}") - raise LLMWhispererClientException(f"API error: non-JSON response - {response_preview}", response.status_code) from e + raise LLMWhispererClientException( + f"API error: non-JSON response - {response_preview}", response.status_code + ) from e raise LLMWhispererClientException(err, response.status_code) message = json.loads(response.text) message["status_code"] = response.status_code diff --git a/tests/unit/client_v2_test.py b/tests/unit/client_v2_test.py index 91d6059..707249b 100644 --- a/tests/unit/client_v2_test.py +++ b/tests/unit/client_v2_test.py @@ -1,7 +1,8 @@ from typing import Any from unittest.mock import MagicMock -from unstract.llmwhisperer.client_v2 import LLMWhispererClientV2 +import pytest +from unstract.llmwhisperer.client_v2 import LLMWhispererClientException, LLMWhispererClientV2 WEBHOOK_URL = "http://test-webhook.com/callback" AUTH_TOKEN = "dummy-auth-token" @@ -34,3 +35,75 @@ def test_get_webhook_details(mocker: Any, client_v2: LLMWhispererClientV2) -> No assert response["status"] == "success" assert response["webhook_details"]["url"] == WEBHOOK_URL + + +def test_whisper_json_string_response_error(mocker: Any, client_v2: LLMWhispererClientV2) -> None: + """Test whisper method handles JSON string responses correctly for error + cases.""" + mock_send = mocker.patch("requests.Session.send") + mock_response = MagicMock() + mock_response.status_code = 400 + mock_response.text = '"Error message as JSON string"' + mock_response.encoding = "utf-8" + mock_send.return_value = mock_response + + with pytest.raises(LLMWhispererClientException) as exc_info: + client_v2.whisper(url="https://example.com/test.pdf") + + error = exc_info.value.args[0] + assert error["message"] == "Error message as JSON string" + assert error["status_code"] == 400 + assert error["extraction"] == {} + + +def test_whisper_json_string_response_202(mocker: Any, client_v2: LLMWhispererClientV2) -> None: + """Test whisper method handles JSON string responses correctly for 202 + status.""" + mock_send = mocker.patch("requests.Session.send") + mock_response = MagicMock() + mock_response.status_code = 202 + mock_response.text = '"Processing in progress"' + mock_response.encoding = "utf-8" + mock_send.return_value = mock_response + + response = client_v2.whisper(url="https://example.com/test.pdf", wait_for_completion=False) + + assert response["message"] == "Processing in progress" + assert response["status_code"] == 202 + assert response["extraction"] == {} + + +def test_whisper_invalid_json_response_error(mocker: Any, client_v2: LLMWhispererClientV2) -> None: + """Test whisper method handles invalid JSON responses correctly for error + cases.""" + mock_send = mocker.patch("requests.Session.send") + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.text = "Invalid JSON response" + mock_response.encoding = "utf-8" + mock_send.return_value = mock_response + + with pytest.raises(LLMWhispererClientException) as exc_info: + client_v2.whisper(url="https://example.com/test.pdf") + + error = exc_info.value.args[0] + assert error["message"] == "Invalid JSON response" + assert error["status_code"] == 500 + assert error["extraction"] == {} + + +def test_whisper_invalid_json_response_202(mocker: Any, client_v2: LLMWhispererClientV2) -> None: + """Test whisper method handles invalid JSON responses correctly for 202 + status.""" + mock_send = mocker.patch("requests.Session.send") + mock_response = MagicMock() + mock_response.status_code = 202 + mock_response.text = "Invalid JSON response" + mock_response.encoding = "utf-8" + mock_send.return_value = mock_response + + response = client_v2.whisper(url="https://example.com/test.pdf", wait_for_completion=False) + + assert response["message"] == "Invalid JSON response" + assert response["status_code"] == 202 + assert response["extraction"] == {}