11import logging
22import ssl
33import urllib .parse
4+ import urllib .request
45from contextlib import contextmanager
56from typing import Dict , Any , Optional , Generator
67
1415from databricks .sql .common .http import HttpMethod
1516from databricks .sql .common .http_utils import (
1617 detect_and_parse_proxy ,
17- create_connection_pool ,
18+ create_basic_proxy_auth_headers ,
1819)
1920
2021logger = logging .getLogger (__name__ )
@@ -27,6 +28,10 @@ class UnifiedHttpClient:
2728 This client uses urllib3 for robust HTTP communication with retry policies,
2829 connection pooling, SSL support, and proxy support. It replaces the various
2930 singleton HTTP clients and direct requests usage throughout the codebase.
31+
32+ The client supports per-request proxy decisions, automatically routing requests
33+ through proxy or direct connections based on system proxy bypass rules and
34+ the target hostname of each request.
3035 """
3136
3237 def __init__ (self , client_context ):
@@ -37,12 +42,17 @@ def __init__(self, client_context):
3742 client_context: ClientContext instance containing HTTP configuration
3843 """
3944 self .config = client_context
40- self ._pool_manager = None
45+ # Since the unified http client is used for all requests, we need to have proxy and direct pool managers
46+ # for per-request proxy decisions.
47+ self ._direct_pool_manager = None
48+ self ._proxy_pool_manager = None
4149 self ._retry_policy = None
42- self ._setup_pool_manager ()
50+ self ._proxy_uri = None
51+ self ._proxy_auth = None
52+ self ._setup_pool_managers ()
4353
44- def _setup_pool_manager (self ):
45- """Set up the urllib3 PoolManager with configuration from ClientContext ."""
54+ def _setup_pool_managers (self ):
55+ """Set up both direct and proxy pool managers for per-request proxy decisions ."""
4656
4757 # SSL context setup
4858 ssl_context = None
@@ -89,11 +99,6 @@ def _setup_pool_manager(self):
8999 self ._retry_policy ._command_type = None
90100 self ._retry_policy ._retry_start_time = None
91101
92- parsed_url = urllib .parse .urlparse (self .config .hostname )
93- self .scheme = parsed_url .scheme
94- self .host = parsed_url .hostname
95- self .port = parsed_url .port
96-
97102 # Common pool manager kwargs
98103 pool_kwargs = {
99104 "num_pools" : self .config .pool_connections ,
@@ -107,31 +112,83 @@ def _setup_pool_manager(self):
107112 "ssl_context" : ssl_context ,
108113 }
109114
110- # Detect proxy using shared utility
111- proxy_uri , proxy_auth = detect_and_parse_proxy (
112- self .scheme , self .config .hostname
113- )
115+ # Always create a direct pool manager
116+ self ._direct_pool_manager = PoolManager (** pool_kwargs )
114117
115- if proxy_uri :
116- parsed_proxy = urllib .parse .urlparse (proxy_uri )
117- # realhost and realport are the host and port of the actual request
118- self .realhost = self .host
119- self .realport = self .port
120- # this is passed to ProxyManager
121- self .proxy_uri : str = proxy_uri
122- self .host = parsed_proxy .hostname
123- self .port = parsed_proxy .port
124- self .proxy_auth = proxy_auth
125- else :
126- self .realhost = self .realport = self .proxy_auth = self .proxy_uri = None
118+ # Detect system proxy configuration
119+ # We use 'https' as default scheme since most requests will be HTTPS
120+ parsed_url = urllib .parse .urlparse (self .config .hostname )
121+ self .scheme = parsed_url .scheme or "https"
127122
128- # Create proxy or regular pool manager
129- if self .using_proxy ():
130- self ._pool_manager = ProxyManager (
131- self .config .http_proxy , proxy_headers = proxy_auth , ** pool_kwargs
123+ # Check if system has proxy configured for our scheme
124+ try :
125+ # Use shared proxy detection logic, skipping bypass since we handle that per-request
126+ proxy_url , proxy_auth = detect_and_parse_proxy (
127+ self .scheme , skip_bypass = True
132128 )
129+
130+ if proxy_url :
131+ # Store proxy configuration for per-request decisions
132+ self ._proxy_uri = proxy_url
133+ self ._proxy_auth = proxy_auth
134+
135+ # Create proxy pool manager
136+ self ._proxy_pool_manager = ProxyManager (
137+ proxy_url , proxy_headers = proxy_auth , ** pool_kwargs
138+ )
139+ logger .debug ("Initialized with proxy support: %s" , proxy_url )
140+ else :
141+ self ._proxy_pool_manager = None
142+ logger .debug ("No system proxy detected, using direct connections only" )
143+
144+ except Exception as e :
145+ # If proxy detection fails, fall back to direct connections only
146+ logger .debug ("Error detecting system proxy configuration: %s" , e )
147+ self ._proxy_pool_manager = None
148+
149+ def _should_use_proxy (self , target_host : str ) -> bool :
150+ """
151+ Determine if a request to the target host should use proxy.
152+
153+ Args:
154+ target_host: The hostname of the target URL
155+
156+ Returns:
157+ True if proxy should be used, False for direct connection
158+ """
159+ # If no proxy is configured, always use direct connection
160+ if not self ._proxy_pool_manager or not self ._proxy_uri :
161+ return False
162+
163+ # Check system proxy bypass rules for this specific host
164+ try :
165+ # proxy_bypass returns True if the host should BYPASS the proxy
166+ # We want the opposite - True if we should USE the proxy
167+ return not urllib .request .proxy_bypass (target_host )
168+ except Exception as e :
169+ # If proxy_bypass fails, default to using proxy (safer choice)
170+ logger .debug ("Error checking proxy bypass for host %s: %s" , target_host , e )
171+ return True
172+
173+ def _get_pool_manager_for_url (self , url : str ) -> urllib3 .PoolManager :
174+ """
175+ Get the appropriate pool manager for the given URL.
176+
177+ Args:
178+ url: The target URL
179+
180+ Returns:
181+ PoolManager instance (either direct or proxy)
182+ """
183+ parsed_url = urllib .parse .urlparse (url )
184+ target_host = parsed_url .hostname
185+
186+ if target_host and self ._should_use_proxy (target_host ):
187+ logger .debug ("Using proxy for request to %s" , target_host )
188+ return self ._proxy_pool_manager
133189 else :
134- self ._pool_manager = PoolManager (** pool_kwargs )
190+ logger .debug ("Using direct connection for request to %s" , target_host )
191+ return self ._direct_pool_manager
135192
136193 def _prepare_headers (
137194 self , headers : Optional [Dict [str , str ]] = None
@@ -184,10 +241,13 @@ def request_context(
184241 # Prepare retry policy for this request
185242 self ._prepare_retry_policy ()
186243
244+ # Select appropriate pool manager based on target URL
245+ pool_manager = self ._get_pool_manager_for_url (url )
246+
187247 response = None
188248
189249 try :
190- response = self . _pool_manager .request (
250+ response = pool_manager .request (
191251 method = method .value , url = url , headers = request_headers , ** kwargs
192252 )
193253 yield response
@@ -227,14 +287,17 @@ def request(
227287 return response
228288
229289 def using_proxy (self ) -> bool :
230- """Check if proxy is being used."""
231- return self .realhost is not None
290+ """Check if proxy support is available (not whether it's being used for a specific request) ."""
291+ return self ._proxy_pool_manager is not None
232292
233293 def close (self ):
234294 """Close the underlying connection pools."""
235- if self ._pool_manager :
236- self ._pool_manager .clear ()
237- self ._pool_manager = None
295+ if self ._direct_pool_manager :
296+ self ._direct_pool_manager .clear ()
297+ self ._direct_pool_manager = None
298+ if self ._proxy_pool_manager :
299+ self ._proxy_pool_manager .clear ()
300+ self ._proxy_pool_manager = None
238301
239302 def __enter__ (self ):
240303 return self
0 commit comments