formatting + minor type fixes

varun-edachali-dbx · varun-edachali-dbx · commit 0868fe3518ec · 2025-07-12T09:11:13.000+05:30
Signed-off-by: varun-edachali-dbx &lt;varun.edachali@databricks.com&gt;
diff --git a/src/databricks/sql/backend/sea/queue.py b/src/databricks/sql/backend/sea/queue.py
@@ -145,55 +145,60 @@ def __init__(
     def _add_links_to_manager(self, links: List["ExternalLink"], notify: bool = True):
         """
         Add external links to both chunk mapping and download manager.
-        
+
         Args:
             links: List of external links to add
             notify: Whether to notify waiting threads (default True)
         """
         for link in links:
             self.chunk_index_to_link[link.chunk_index] = link
             self.download_manager.add_link(self._convert_to_thrift_link(link))
-        
+
         if notify:
             self._link_data_update.notify_all()
 
     def _clear_chunks_from_index(self, start_chunk_index: int):
         """
         Clear all chunks >= start_chunk_index from the chunk mapping.
-        
+
         Args:
             start_chunk_index: The chunk index to start clearing from (inclusive)
         """
         chunks_to_remove = [
-            chunk_idx for chunk_idx in self.chunk_index_to_link.keys() 
+            chunk_idx
+            for chunk_idx in self.chunk_index_to_link.keys()
             if chunk_idx >= start_chunk_index
         ]
-        
-        logger.debug(f"LinkFetcher: Clearing chunks {chunks_to_remove} from index {start_chunk_index}")
+
+        logger.debug(
+            f"LinkFetcher: Clearing chunks {chunks_to_remove} from index {start_chunk_index}"
+        )
         for chunk_idx in chunks_to_remove:
             del self.chunk_index_to_link[chunk_idx]
 
     def _fetch_and_add_links(self, chunk_index: int) -> List["ExternalLink"]:
         """
         Fetch links from backend and add them to manager.
-        
+
         Args:
             chunk_index: The chunk index to fetch
-            
+
         Returns:
             List of fetched external links
-            
+
         Raises:
             Exception: If fetching fails
         """
         logger.debug(f"LinkFetcher: Fetching links for chunk {chunk_index}")
-        
+
         try:
             links = self.backend.get_chunk_links(self._statement_id, chunk_index)
             self._add_links_to_manager(links, notify=True)
-            logger.debug(f"LinkFetcher: Added {len(links)} links starting from chunk {chunk_index}")
+            logger.debug(
+                f"LinkFetcher: Added {len(links)} links starting from chunk {chunk_index}"
+            )
             return links
-            
+
         except Exception as e:
             logger.error(f"LinkFetcher: Failed to fetch chunk {chunk_index}: {e}")
             self._error = e
@@ -236,38 +241,38 @@ def get_chunk_link(self, chunk_index: int) -> Optional["ExternalLink"]:
     def restart_from_chunk(self, chunk_index: int):
         """
         Restart the LinkFetcher from a specific chunk index.
-        
+
         This method handles both cases:
         1. LinkFetcher is done/closed but we need to restart it
         2. LinkFetcher is active but we need it to start from the expired chunk
-        
+
         The key insight: we need to clear all chunks >= restart_chunk_index
         so that _get_next_chunk_index() returns the correct next chunk.
-        
+
         Args:
             chunk_index: The chunk index to restart from
         """
         logger.debug(f"LinkFetcher: Restarting from chunk {chunk_index}")
-        
+
         # Stop the current worker if running
         self.stop()
-        
+
         with self._link_data_update:
             # Clear error state
             self._error = None
-            
+
             # 🔥 CRITICAL: Clear all chunks >= restart_chunk_index
             # This ensures _get_next_chunk_index() works correctly
             self._clear_chunks_from_index(chunk_index)
-            
+
             # Now fetch the restart chunk (and potentially its batch)
             # This becomes our new "max chunk" and starting point
             try:
                 self._fetch_and_add_links(chunk_index)
             except Exception as e:
                 # Error already logged and set by _fetch_and_add_links
                 raise e
-        
+
         # Start the worker again - now _get_next_chunk_index() will work correctly
         self.start()
         logger.debug(f"LinkFetcher: Successfully restarted from chunk {chunk_index}")
@@ -294,7 +299,7 @@ def _worker_loop(self):
     def start(self):
         if self._worker_thread and self._worker_thread.is_alive():
             return  # Already running
-        
+
         self._shutdown_event.clear()
         self._worker_thread = threading.Thread(target=self._worker_loop)
         self._worker_thread.start()
@@ -376,22 +381,24 @@ def __init__(
         # Initialize table and position
         self.table = self._create_next_table()
 
-    def _handle_expired_link(self, expired_link: TSparkArrowResultLink) -> TSparkArrowResultLink:
+    def _handle_expired_link(
+        self, expired_link: TSparkArrowResultLink
+    ) -> TSparkArrowResultLink:
         """
         Handle expired link for SEA backend.
-        
+
         For SEA backend, we can handle expired links robustly by:
         1. Cancelling all pending downloads
         2. Finding the chunk index for the expired link
         3. Restarting the LinkFetcher from that chunk
         4. Returning the requested link
-        
+
         Args:
             expired_link: The expired link
-            
+
         Returns:
             A new link with the same row offset
-            
+
         Raises:
             Error: If unable to fetch new link
         """
@@ -400,14 +407,19 @@ def _handle_expired_link(self, expired_link: TSparkArrowResultLink) -> TSparkArr
                 expired_link.startRowOffset, expired_link.rowCount
             )
         )
-        
+
+        if not self.download_manager:
+            raise ValueError("Download manager not initialized")
+
         try:
             # Step 1: Cancel all pending downloads
             self.download_manager.cancel_all_downloads()
             logger.debug("SeaCloudFetchQueue: Cancelled all pending downloads")
-            
+
             # Step 2: Find which chunk contains the expired link
-            target_chunk_index = self._find_chunk_index_for_row_offset(expired_link.startRowOffset)
+            target_chunk_index = self._find_chunk_index_for_row_offset(
+                expired_link.startRowOffset
+            )
             if target_chunk_index is None:
                 # If we can't find the chunk, we may need to search more broadly
                 # For now, let's assume it's a reasonable chunk based on the row offset
@@ -419,31 +431,38 @@ def _handle_expired_link(self, expired_link: TSparkArrowResultLink) -> TSparkArr
                 )
                 # Try to estimate chunk index - this is a heuristic
                 target_chunk_index = 0  # Start from beginning as fallback
-            
+
             # Step 3: Restart LinkFetcher from the target chunk
             # This handles both stopped and active LinkFetcher cases
             self.link_fetcher.restart_from_chunk(target_chunk_index)
-            
+
             # Step 4: Find and return the link that matches the expired link's row offset
             # After restart, the chunk should be available
-            for chunk_index, external_link in self.link_fetcher.chunk_index_to_link.items():
+            for (
+                chunk_index,
+                external_link,
+            ) in self.link_fetcher.chunk_index_to_link.items():
                 if external_link.row_offset == expired_link.startRowOffset:
-                    new_thrift_link = self.link_fetcher._convert_to_thrift_link(external_link)
+                    new_thrift_link = self.link_fetcher._convert_to_thrift_link(
+                        external_link
+                    )
                     logger.debug(
                         "SeaCloudFetchQueue: Found replacement link for offset {}, row count {}".format(
                             new_thrift_link.startRowOffset, new_thrift_link.rowCount
                         )
                     )
                     return new_thrift_link
-            
+
             # If we still can't find it, raise an error
             logger.error(
                 "SeaCloudFetchQueue: Could not find replacement link for row offset {} after restart".format(
                     expired_link.startRowOffset
                 )
             )
-            raise Error(f"CloudFetch link has expired and could not be renewed for offset {expired_link.startRowOffset}")
-            
+            raise Error(
+                f"CloudFetch link has expired and could not be renewed for offset {expired_link.startRowOffset}"
+            )
+
         except Exception as e:
             logger.error(
                 "SeaCloudFetchQueue: Error handling expired link: {}".format(str(e))
@@ -456,18 +475,18 @@ def _handle_expired_link(self, expired_link: TSparkArrowResultLink) -> TSparkArr
     def _find_chunk_index_for_row_offset(self, row_offset: int) -> Optional[int]:
         """
         Find the chunk index that contains the given row offset.
-        
+
         Args:
             row_offset: The row offset to find
-            
+
         Returns:
             The chunk index, or None if not found
         """
         # Search through our known chunks to find the one containing this row offset
         for chunk_index, external_link in self.link_fetcher.chunk_index_to_link.items():
             if external_link.row_offset == row_offset:
                 return chunk_index
-        
+
         # If not found in known chunks, return None and let the caller handle it
         return None
 
diff --git a/src/databricks/sql/cloudfetch/download_manager.py b/src/databricks/sql/cloudfetch/download_manager.py
@@ -41,7 +41,7 @@ def __init__(
 
         self._downloadable_result_settings = DownloadableResultSettings(
             is_lz4_compressed=lz4_compressed,
-            expired_link_callback=expired_link_callback
+            expired_link_callback=expired_link_callback,
         )
         self._ssl_options = ssl_options
 
@@ -126,22 +126,22 @@ def add_link(self, link: TSparkArrowResultLink):
     def cancel_all_downloads(self):
         """
         Cancel all pending downloads and clear the download queue.
-        
+
         This method is typically called when links have expired and we need to
         cancel all pending downloads before fetching new links.
         """
         logger.debug("ResultFileDownloadManager: cancelling all downloads")
-        
+
         # Cancel all pending download tasks
         cancelled_count = 0
         for task in self._download_tasks:
             if task.cancel():
                 cancelled_count += 1
-        
+
         logger.debug(
             f"ResultFileDownloadManager: cancelled {cancelled_count} out of {len(self._download_tasks)} downloads"
         )
-        
+
         # Clear the download tasks and pending links
         self._download_tasks.clear()
         self._pending_links.clear()
diff --git a/src/databricks/sql/cloudfetch/downloader.py b/src/databricks/sql/cloudfetch/downloader.py
@@ -56,11 +56,11 @@ class DownloadableResultSettings:
         expired_link_callback (Callable): Callback function to handle expired links. Must return a new link.
     """
 
+    expired_link_callback: Callable[[TSparkArrowResultLink], TSparkArrowResultLink]
     is_lz4_compressed: bool
     link_expiry_buffer_secs: int = 0
     download_timeout: int = 60
     max_consecutive_file_download_retries: int = 0
-    expired_link_callback: Callable[[TSparkArrowResultLink], TSparkArrowResultLink] = None
 
 
 class ResultSetDownloadHandler:
@@ -90,7 +90,10 @@ def run(self) -> DownloadedFile:
 
         # Check if link is already expired or is expiring
         ResultSetDownloadHandler._validate_link(
-            self.link, self.settings.link_expiry_buffer_secs, self.settings.expired_link_callback, self
+            self.link,
+            self.settings.link_expiry_buffer_secs,
+            self.settings.expired_link_callback,
+            self,
         )
 
         session = requests.Session()
@@ -158,7 +161,12 @@ def _is_link_expired(link: TSparkArrowResultLink, expiry_buffer_secs: int) -> bo
         )
 
     @staticmethod
-    def _validate_link(link: TSparkArrowResultLink, expiry_buffer_secs: int, expired_link_callback: Callable, handler_instance):
+    def _validate_link(
+        link: TSparkArrowResultLink,
+        expiry_buffer_secs: int,
+        expired_link_callback: Callable,
+        handler_instance,
+    ):
         """
         Check if a link has expired or will expire, and handle expired links via callback.
 
diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py
@@ -387,16 +387,18 @@ def __init__(
         # Initialize table and position
         self.table = self._create_next_table()
 
-    def _handle_expired_link(self, expired_link: TSparkArrowResultLink) -> TSparkArrowResultLink:
+    def _handle_expired_link(
+        self, expired_link: TSparkArrowResultLink
+    ) -> TSparkArrowResultLink:
         """
         Handle expired link for Thrift backend.
-        
+
         For Thrift backend, we cannot fetch new links, so we raise an error.
         This maintains the existing behavior for Thrift.
-        
+
         Args:
             expired_link: The expired link
-            
+
         Raises:
             Error: Always raises an error indicating the link has expired
         """
diff --git a/tests/unit/test_downloader.py b/tests/unit/test_downloader.py
@@ -24,7 +24,9 @@ class DownloaderTests(unittest.TestCase):
     def test_run_link_expired(self, mock_time):
         settings = Mock()
         settings.link_expiry_buffer_secs = 0
-        settings.expired_link_callback = Mock(side_effect=Error("CloudFetch link has expired"))
+        settings.expired_link_callback = Mock(
+            side_effect=Error("CloudFetch link has expired")
+        )
         result_link = Mock()
         # Already expired
         result_link.expiryTime = 999
@@ -41,7 +43,9 @@ def test_run_link_expired(self, mock_time):
     @patch("time.time", return_value=1000)
     def test_run_link_past_expiry_buffer(self, mock_time):
         settings = Mock(link_expiry_buffer_secs=5)
-        settings.expired_link_callback = Mock(side_effect=Error("CloudFetch link has expired"))
+        settings.expired_link_callback = Mock(
+            side_effect=Error("CloudFetch link has expired")
+        )
         result_link = Mock()
         # Within the expiry buffer time
         result_link.expiryTime = 1004