66
77from databricks .sql .cloudfetch .download_manager import ResultFileDownloadManager
88
9- import lz4 . frame
9+ from databricks . sql . cloudfetch . downloader import ResultSetDownloadHandler
1010
1111try :
1212 import pyarrow
3737logger = logging .getLogger (__name__ )
3838
3939
40- def decompress_multi_frame_lz4 (attachment : bytes ) -> bytes :
41- try :
42- decompressor = lz4 .frame .LZ4FrameDecompressor ()
43- arrow_file = decompressor .decompress (attachment )
44-
45- # the attachment may be a concatenation of multiple LZ4 frames
46- while decompressor .unused_data :
47- remaining_data = decompressor .unused_data
48- arrow_file += decompressor .decompress (remaining_data )
49-
50- logger .debug (f"LZ4 decompressed { len (arrow_file )} bytes from attachment" )
51-
52- except Exception as e :
53- logger .error (f"LZ4 decompression failed: { e } " )
54- raise e
55-
56- return arrow_file
57-
58-
5940class SeaResultSetQueueFactory (ABC ):
6041 @staticmethod
6142 def build_queue (
@@ -90,7 +71,7 @@ def build_queue(
9071 elif manifest .format == ResultFormat .ARROW_STREAM .value :
9172 if result_data .attachment is not None :
9273 arrow_file = (
93- decompress_multi_frame_lz4 (result_data .attachment )
74+ ResultSetDownloadHandler . _decompress_data (result_data .attachment )
9475 if lz4_compressed
9576 else result_data .attachment
9677 )
@@ -300,10 +281,57 @@ def __init__(
300281 self .link_fetcher .start ()
301282
302283 # Initialize table and position
303- self .table = self ._create_next_table ()
284+ self .table = self ._create_table_from_link (self ._current_chunk_link )
285+
286+ def _convert_to_thrift_link (self , link : "ExternalLink" ) -> TSparkArrowResultLink :
287+ """Convert SEA external links to Thrift format for compatibility with existing download manager."""
288+ # Parse the ISO format expiration time
289+ expiry_time = int (dateutil .parser .parse (link .expiration ).timestamp ())
290+ return TSparkArrowResultLink (
291+ fileLink = link .external_link ,
292+ expiryTime = expiry_time ,
293+ rowCount = link .row_count ,
294+ bytesNum = link .byte_count ,
295+ startRowOffset = link .row_offset ,
296+ httpHeaders = link .http_headers or {},
297+ )
298+
299+ def _get_chunk_link (self , chunk_index : int ) -> Optional ["ExternalLink" ]:
300+ if chunk_index not in self ._chunk_index_to_link :
301+ links = self ._sea_client .get_chunk_links (self ._statement_id , chunk_index )
302+ self ._chunk_index_to_link .update ({link .chunk_index : link for link in links })
303+ return self ._chunk_index_to_link .get (chunk_index , None )
304+
305+ def _progress_chunk_link (self ):
306+ """Progress to the next chunk link."""
307+ if not self ._current_chunk_link :
308+ return None
309+
310+ next_chunk_index = self ._current_chunk_link .next_chunk_index
311+
312+ if next_chunk_index is None :
313+ self ._current_chunk_link = None
314+ return None
315+
316+ self ._current_chunk_link = self ._get_chunk_link (next_chunk_index )
317+ if not self ._current_chunk_link :
318+ logger .error (
319+ "SeaCloudFetchQueue: unable to retrieve link for chunk {}" .format (
320+ next_chunk_index
321+ )
322+ )
323+ return None
324+
325+ logger .debug (
326+ f"SeaCloudFetchQueue: Progressed to link for chunk { next_chunk_index } : { self ._current_chunk_link } "
327+ )
304328
305329 def _create_next_table (self ) -> Union ["pyarrow.Table" , None ]:
306330 """Create next table by retrieving the logical next downloaded file."""
331+ if not self ._current_chunk_link :
332+ logger .debug ("SeaCloudFetchQueue: No current chunk link, returning" )
333+ return None
334+
307335 if not self .download_manager :
308336 logger .debug ("SeaCloudFetchQueue: No download manager, returning" )
309337 return None
@@ -317,4 +345,8 @@ def _create_next_table(self) -> Union["pyarrow.Table", None]:
317345
318346 self .current_chunk_index += 1
319347
320- return arrow_table
348+ if not self ._current_chunk_link :
349+ logger .debug ("SeaCloudFetchQueue: No current chunk link, returning" )
350+ return None
351+
352+ return self ._create_table_from_link (self ._current_chunk_link )
0 commit comments