hybrid disposition

varun-edachali-dbx · varun-edachali-dbx · commit e74ccd13c90a · 2025-07-08T19:33:24.000+05:30
Signed-off-by: varun-edachali-dbx &lt;varun.edachali@databricks.com&gt;
diff --git a/src/databricks/sql/backend/sea/backend.py b/src/databricks/sql/backend/sea/backend.py
@@ -125,7 +125,8 @@ def __init__(
 
         super().__init__(ssl_options=ssl_options, **kwargs)
 
-        self.use_hybrid_disposition = kwargs.get("use_hybrid_disposition", False)
+        self.use_hybrid_disposition = kwargs.get("use_hybrid_disposition", True)
+        logger.info(f"use_hybrid_disposition: {self.use_hybrid_disposition}")
 
         # Extract warehouse ID from http_path
         self.warehouse_id = self._extract_warehouse_id(http_path)
diff --git a/src/databricks/sql/backend/sea/models/responses.py b/src/databricks/sql/backend/sea/models/responses.py
@@ -4,6 +4,7 @@
 These models define the structures used in SEA API responses.
 """
 
+import base64
 from typing import Dict, Any, List
 from dataclasses import dataclass
 
@@ -91,6 +92,12 @@ def _parse_result(data: Dict[str, Any]) -> ResultData:
                 )
             )
 
+    # Handle attachment field - decode from base64 if present
+    attachment = result_data.get("attachment")
+    if attachment is not None and isinstance(attachment, str):
+        # Decode base64 string to bytes
+        attachment = base64.b64decode(attachment)
+
     return ResultData(
         data=result_data.get("data_array"),
         external_links=external_links,
@@ -100,7 +107,7 @@ def _parse_result(data: Dict[str, Any]) -> ResultData:
         next_chunk_internal_link=result_data.get("next_chunk_internal_link"),
         row_count=result_data.get("row_count"),
         row_offset=result_data.get("row_offset"),
-        attachment=result_data.get("attachment"),
+        attachment=attachment,
     )
 
 
diff --git a/src/databricks/sql/backend/sea/queue.py b/src/databricks/sql/backend/sea/queue.py
@@ -5,6 +5,8 @@
 
 from databricks.sql.cloudfetch.download_manager import ResultFileDownloadManager
 
+import lz4.frame
+
 try:
     import pyarrow
 except ImportError:
@@ -22,7 +24,7 @@
 from databricks.sql.exc import ProgrammingError
 from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
 from databricks.sql.types import SSLOptions
-from databricks.sql.utils import CloudFetchQueue, ResultSetQueue
+from databricks.sql.utils import ArrowQueue, CloudFetchQueue, ResultSetQueue, create_arrow_table_from_arrow_file
 
 import logging
 
@@ -61,6 +63,15 @@ def build_queue(
             # INLINE disposition with JSON_ARRAY format
             return JsonQueue(result_data.data)
         elif manifest.format == ResultFormat.ARROW_STREAM.value:
+            if result_data.attachment is not None: 
+                arrow_file = (
+                    lz4.frame.decompress(result_data.attachment)
+                    if lz4_compressed
+                    else result_data.attachment
+                )
+                arrow_table = create_arrow_table_from_arrow_file(arrow_file, description)
+                return ArrowQueue(arrow_table, manifest.total_row_count)
+
             # EXTERNAL_LINKS disposition
             return SeaCloudFetchQueue(
                 initial_links=result_data.external_links or [],
@@ -144,7 +155,9 @@ def __init__(
             )
         )
 
-        initial_link = next((l for l in initial_links if l.chunk_index == 0), None)
+        self._chunk_index_to_link = {link.chunk_index: link for link in initial_links}
+
+        initial_link = self._chunk_index_to_link.get(0, None)
         if not initial_link:
             return
 
@@ -174,6 +187,12 @@ def _convert_to_thrift_link(self, link: "ExternalLink") -> TSparkArrowResultLink
             httpHeaders=link.http_headers or {},
         )
 
+    def _get_chunk_link(self, chunk_index: int) -> Optional["ExternalLink"]:
+        if chunk_index not in self._chunk_index_to_link:
+            links = self._sea_client.get_chunk_links(self._statement_id, chunk_index)
+            self._chunk_index_to_link.update({link.chunk_index: link for link in links})
+        return self._chunk_index_to_link.get(chunk_index, None)
+
     def _progress_chunk_link(self):
         """Progress to the next chunk link."""
         if not self._current_chunk_link:
@@ -185,17 +204,11 @@ def _progress_chunk_link(self):
             self._current_chunk_link = None
             return None
 
-        try:
-            self._current_chunk_link = self._sea_client.get_chunk_link(
-                self._statement_id, next_chunk_index
-            )
-        except Exception as e:
+        self._current_chunk_link = self._get_chunk_link(next_chunk_index)
+        if not self._current_chunk_link:
             logger.error(
-                "SeaCloudFetchQueue: Error fetching link for chunk {}: {}".format(
-                    next_chunk_index, e
-                )
+                "SeaCloudFetchQueue: unable to retrieve link for chunk {}".format(next_chunk_index)
             )
-            self._current_chunk_link = None
             return None
 
         logger.debug(