reduce repeated init

varun-edachali-dbx · varun-edachali-dbx · commit 2cd802ea624f · 2025-07-21T07:52:27.000+05:30
Signed-off-by: varun-edachali-dbx &lt;varun.edachali@databricks.com&gt;
diff --git a/src/databricks/sql/backend/sea/queue.py b/src/databricks/sql/backend/sea/queue.py
@@ -280,22 +280,7 @@ def _worker_loop(self):
         self._link_data_update.notify_all()
 
     def _restart_from_expired_link(self, link: TSparkArrowResultLink):
-        self.stop()
-
-        with self._link_data_update:
-            self.download_manager.cancel_tasks_from_offset(link.startRowOffset)
-
-            chunks_to_restart = []
-            for chunk_index, l in self.chunk_index_to_link.items():
-                if l.row_offset < link.startRowOffset:
-                    continue
-                chunks_to_restart.append(chunk_index)
-            for chunk_index in chunks_to_restart:
-                self.chunk_index_to_link.pop(chunk_index)
-
-        self.start()
-
-    def _restart_from_expired_link(self, link: TSparkArrowResultLink):
+        """Restart the link fetcher from the expired link."""
         self.stop()
 
         with self._link_data_update:
@@ -363,6 +348,7 @@ def __init__(
             schema_bytes=None,
             lz4_compressed=lz4_compressed,
             description=description,
+            expiry_callback=self._expiry_callback,
         )
 
         logger.debug(
@@ -376,14 +362,6 @@ def __init__(
         # Track the current chunk we're processing
         self._current_chunk_index = 0
 
-        self.download_manager = ResultFileDownloadManager(
-            links=[],
-            max_download_threads=max_download_threads,
-            lz4_compressed=lz4_compressed,
-            ssl_options=ssl_options,
-            expiry_callback=self._expiry_callback,
-        )
-
         self.link_fetcher = None
         if total_chunk_count > 0:
             self.link_fetcher = LinkFetcher(
@@ -402,6 +380,8 @@ def _expiry_callback(self, link: TSparkArrowResultLink):
         logger.info(
             f"SeaCloudFetchQueue: Link expired, restarting from offset {link.startRowOffset}"
         )
+        if not self.link_fetcher:
+            return
         self.link_fetcher._restart_from_expired_link(link)
 
     def _create_next_table(self) -> Union["pyarrow.Table", None]:
diff --git a/src/databricks/sql/cloudfetch/download_manager.py b/src/databricks/sql/cloudfetch/download_manager.py
@@ -14,29 +14,28 @@
 
 logger = logging.getLogger(__name__)
 
-T = TypeVar('T')
+T = TypeVar("T")
 
 
 class TaskWithMetadata(Generic[T]):
     """
     Wrapper around Future that stores additional metadata (the link).
     Provides type-safe access to both the Future result and the associated link.
     """
-    
+
     def __init__(self, future: Future[T], link: TSparkArrowResultLink):
         self.future = future
         self.link = link
-    
+
     def result(self, timeout: Optional[float] = None) -> T:
         """Get the result of the Future, blocking if necessary."""
         return self.future.result(timeout)
-    
+
     def cancel(self) -> bool:
         """Cancel the Future if possible."""
         return self.future.cancel()
 
 
-
 class ResultFileDownloadManager:
     def __init__(
         self,
diff --git a/src/databricks/sql/utils.py b/src/databricks/sql/utils.py
@@ -1,5 +1,5 @@
 from __future__ import annotations
-from typing import Dict, List, Optional, Union
+from typing import Callable, Dict, List, Optional, Union
 
 from dateutil import parser
 import datetime
@@ -219,6 +219,7 @@ def __init__(
         schema_bytes: Optional[bytes] = None,
         lz4_compressed: bool = True,
         description: List[Tuple] = [],
+        expiry_callback: Callable[[TSparkArrowResultLink], None] = lambda _: None,
     ):
         """
         Initialize the base CloudFetchQueue.
@@ -247,6 +248,7 @@ def __init__(
             max_download_threads=max_download_threads,
             lz4_compressed=lz4_compressed,
             ssl_options=ssl_options,
+            expiry_callback=expiry_callback,
         )
 
     def next_n_rows(self, num_rows: int) -> "pyarrow.Table":
@@ -373,6 +375,7 @@ def __init__(
             schema_bytes=schema_bytes,
             lz4_compressed=lz4_compressed,
             description=description,
+            expiry_callback=self._expiry_callback,
         )
 
         self.start_row_index = start_row_offset
@@ -392,21 +395,12 @@ def __init__(
                 )
                 self.download_manager.add_link(result_link)
 
-        def expiry_callback(link: TSparkArrowResultLink):
-            raise Error("Cloudfetch link has expired")
-
-        # Initialize download manager
-        self.download_manager = ResultFileDownloadManager(
-            links=self.result_links,
-            max_download_threads=self.max_download_threads,
-            lz4_compressed=self.lz4_compressed,
-            ssl_options=self._ssl_options,
-            expiry_callback=expiry_callback,
-        )
-
         # Initialize table and position
         self.table = self._create_next_table()
 
+    def _expiry_callback(self, link: TSparkArrowResultLink):
+        raise Error("Cloudfetch link has expired")
+
     def _create_next_table(self) -> Union["pyarrow.Table", None]:
         logger.debug(
             "ThriftCloudFetchQueue: Trying to get downloaded file for row {}".format(