langchain-ai · mdrxy · Dec 1, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 25, 2025
diff --git a/libs/vertexai/langchain_google_vertexai/_base.py b/libs/vertexai/langchain_google_vertexai/_base.py
@@ -10,6 +10,7 @@
     cast,
 )
 
+import httpx
 import vertexai
 from google.api_core.client_options import ClientOptions
 from google.cloud.aiplatform import initializer
@@ -293,6 +294,12 @@ class _VertexAICommon(_VertexAIBase):
     )
     """Enable timestamp understanding of audio-only files."""
 
+    timeout: float | httpx.Timeout | None = Field(
+        default=None,
+        description="Timeout for API requests.",
+    )
+    """The timeout for requests to the Vertex AI API, in seconds."""
+
     @property
     def _llm_type(self) -> str:
         return "vertexai"

diff --git a/libs/vertexai/langchain_google_vertexai/chat_models.py b/libs/vertexai/langchain_google_vertexai/chat_models.py
@@ -2283,6 +2283,7 @@ def _generate_gemini(
             wait_exponential_kwargs=self.wait_exponential_kwargs,
             request=request,
             metadata=self.default_metadata,
+            timeout=self.timeout,
             **kwargs,
         )
         return self._gemini_response_to_chat_result(response)
@@ -2303,6 +2304,7 @@ async def _agenerate_gemini(
                 messages=messages, stop=stop, **kwargs
             ),
             metadata=self.default_metadata,
+            timeout=self.timeout,
             **kwargs,
         )
         return self._gemini_response_to_chat_result(response)
@@ -2413,6 +2415,7 @@ def _stream_gemini(
             wait_exponential_kwargs=self.wait_exponential_kwargs,
             request=request,
             metadata=self.default_metadata,
+            timeout=self.timeout,
             **kwargs,
         )
         total_lc_usage = None
@@ -2441,6 +2444,7 @@ async def _astream(
             wait_exponential_kwargs=self.wait_exponential_kwargs,
             request=request,
             metadata=self.default_metadata,
+            timeout=self.timeout,
             **kwargs,
         )
         total_lc_usage = None

diff --git a/libs/vertexai/langchain_google_vertexai/llms.py b/libs/vertexai/langchain_google_vertexai/llms.py
@@ -119,6 +119,7 @@ def validate_environment(self) -> Self:
             seed=self.seed,
             response_schema=self.response_schema,
             response_mime_type=self.response_mime_type,
+            timeout=self.timeout,
         )
         return self
 

diff --git a/libs/vertexai/langchain_google_vertexai/model_garden.py b/libs/vertexai/langchain_google_vertexai/model_garden.py
@@ -8,7 +8,6 @@
     Literal,
 )
 
-import httpx
 from google.auth.credentials import Credentials
 from langchain_core.callbacks.manager import (
     AsyncCallbackManagerForLLMRun,
@@ -180,11 +179,6 @@ class ChatAnthropicVertex(_VertexAICommon, BaseChatModel):
     - `exp_base`: Exponent base to use (Default: `2.0`)
     """
 
-    timeout: float | httpx.Timeout | None = Field(
-        default=None,
-        description="Timeout for API requests.",
-    )
-
     http_client: Any = Field(default=None, exclude=True)
 
     async_http_client: Any = Field(default=None, exclude=True)

diff --git a/libs/vertexai/tests/integration_tests/test_chat_models.py b/libs/vertexai/tests/integration_tests/test_chat_models.py
@@ -7,6 +7,8 @@
 import re
 from typing import Any, Literal, cast
 
+from google.api_core.exceptions import DeadlineExceeded
+
 try:
     from langgraph.graph.state import CompiledStateGraph
 except ImportError:
@@ -1701,3 +1703,30 @@ def test_code_execution_builtin(output_version: str) -> None:
     }
     response = llm.invoke([input_message, full, next_message])
     _check_code_execution_output(response, output_version)
+
+
+@pytest.mark.release
+def test_chat_vertexai_timeout_non_streaming() -> None:
+    """Test timeout parameter in non-streaming mode."""
+    vertexai.init(api_transport="grpc")
+    model = ChatVertexAI(
+        model_name=_DEFAULT_MODEL_NAME,
+        timeout=0.001,
+        rate_limiter=RATE_LIMITER,
+    )
+    with pytest.raises(DeadlineExceeded):
+        model.invoke([HumanMessage(content="Hello")])
+
+
+@pytest.mark.release
+def test_chat_vertexai_timeout_streaming() -> None:
+    """Test timeout parameter in streaming mode."""
+    vertexai.init(api_transport="grpc")
+    model = ChatVertexAI(
+        model_name=_DEFAULT_MODEL_NAME,
+        timeout=0.001,
+        streaming=True,
+        rate_limiter=RATE_LIMITER,
+    )
+    with pytest.raises(DeadlineExceeded):
+        model.invoke([HumanMessage(content="Hello")])
diff --git a/libs/vertexai/tests/unit_tests/__snapshots__/test_standard.ambr b/libs/vertexai/tests/unit_tests/__snapshots__/test_standard.ambr
@@ -16,7 +16,6 @@
       'max_retries': 2,
       'model_kwargs': dict({
         'api_key': 'test',
-        'timeout': 60,
       }),
       'model_name': 'gemini-2.5-pro',
       'n': 1,
@@ -25,6 +24,7 @@
       'stop': list([
       ]),
       'temperature': 0.0,
+      'timeout': 60.0,
     }),
     'lc': 1,
     'name': 'ChatVertexAI',

diff --git a/libs/vertexai/tests/unit_tests/test_anthropic_utils.py b/libs/vertexai/tests/unit_tests/test_anthropic_utils.py
@@ -2,6 +2,7 @@
 
 import base64
 from unittest.mock import patch
+
 import pytest
 from anthropic.types import (
     RawContentBlockDeltaEvent,
@@ -19,14 +20,13 @@
 
 from langchain_google_vertexai._anthropic_utils import (
     _documents_in_params,
+    _format_image,
     _format_message_anthropic,
     _format_messages_anthropic,
     _make_message_chunk_from_anthropic_event,
     _thinking_in_params,
 )
 
-from langchain_google_vertexai._anthropic_utils import _format_image
-
 
 def test_format_message_anthropic_with_cache_control_in_kwargs() -> None:
     """Test formatting a message with cache control in additional_kwargs."""

diff --git a/libs/vertexai/tests/unit_tests/test_llm.py b/libs/vertexai/tests/unit_tests/test_llm.py
@@ -196,3 +196,10 @@ def test_tracing_params() -> None:
             "ls_temperature": 0.1,
             "ls_max_tokens": 10,
         }
+
+
+def test_timeout_parameter() -> None:
+    # Test that timeout parameter is passed to ChatVertexAI client.
+    llm = VertexAI(model_name=_DEFAULT_MODEL_NAME, project="test-project", timeout=30.0)
+    assert llm.timeout == 30.0
+    assert llm.client.timeout == 30.0