55
66from databricks .sql .cloudfetch .download_manager import ResultFileDownloadManager
77
8+ import lz4 .frame
9+
810try :
911 import pyarrow
1012except ImportError :
2224from databricks .sql .exc import ProgrammingError
2325from databricks .sql .thrift_api .TCLIService .ttypes import TSparkArrowResultLink
2426from databricks .sql .types import SSLOptions
25- from databricks .sql .utils import CloudFetchQueue , ResultSetQueue
27+ from databricks .sql .utils import ArrowQueue , CloudFetchQueue , ResultSetQueue , create_arrow_table_from_arrow_file
2628
2729import logging
2830
@@ -61,6 +63,15 @@ def build_queue(
6163 # INLINE disposition with JSON_ARRAY format
6264 return JsonQueue (result_data .data )
6365 elif manifest .format == ResultFormat .ARROW_STREAM .value :
66+ if result_data .attachment is not None :
67+ arrow_file = (
68+ lz4 .frame .decompress (result_data .attachment )
69+ if lz4_compressed
70+ else result_data .attachment
71+ )
72+ arrow_table = create_arrow_table_from_arrow_file (arrow_file , description )
73+ return ArrowQueue (arrow_table , manifest .total_row_count )
74+
6475 # EXTERNAL_LINKS disposition
6576 return SeaCloudFetchQueue (
6677 initial_links = result_data .external_links or [],
@@ -144,7 +155,9 @@ def __init__(
144155 )
145156 )
146157
147- initial_link = next ((l for l in initial_links if l .chunk_index == 0 ), None )
158+ self ._chunk_index_to_link = {link .chunk_index : link for link in initial_links }
159+
160+ initial_link = self ._chunk_index_to_link .get (0 , None )
148161 if not initial_link :
149162 return
150163
@@ -174,6 +187,12 @@ def _convert_to_thrift_link(self, link: "ExternalLink") -> TSparkArrowResultLink
174187 httpHeaders = link .http_headers or {},
175188 )
176189
190+ def _get_chunk_link (self , chunk_index : int ) -> Optional ["ExternalLink" ]:
191+ if chunk_index not in self ._chunk_index_to_link :
192+ links = self ._sea_client .get_chunk_links (self ._statement_id , chunk_index )
193+ self ._chunk_index_to_link .update ({link .chunk_index : link for link in links })
194+ return self ._chunk_index_to_link .get (chunk_index , None )
195+
177196 def _progress_chunk_link (self ):
178197 """Progress to the next chunk link."""
179198 if not self ._current_chunk_link :
@@ -185,17 +204,11 @@ def _progress_chunk_link(self):
185204 self ._current_chunk_link = None
186205 return None
187206
188- try :
189- self ._current_chunk_link = self ._sea_client .get_chunk_link (
190- self ._statement_id , next_chunk_index
191- )
192- except Exception as e :
207+ self ._current_chunk_link = self ._get_chunk_link (next_chunk_index )
208+ if not self ._current_chunk_link :
193209 logger .error (
194- "SeaCloudFetchQueue: Error fetching link for chunk {}: {}" .format (
195- next_chunk_index , e
196- )
210+ "SeaCloudFetchQueue: unable to retrieve link for chunk {}" .format (next_chunk_index )
197211 )
198- self ._current_chunk_link = None
199212 return None
200213
201214 logger .debug (
0 commit comments