1+ from __future__ import annotations
2+
13from abc import ABC , abstractmethod
24from typing import List , Optional , Any , TYPE_CHECKING
35
@@ -33,8 +35,8 @@ class ResultSet(ABC):
3335
3436 def __init__ (
3537 self ,
36- connection : " Connection" ,
37- backend : " DatabricksClient" ,
38+ connection : Connection ,
39+ backend : DatabricksClient ,
3840 arraysize : int ,
3941 buffer_size_bytes : int ,
4042 command_id : CommandId ,
@@ -51,8 +53,8 @@ def __init__(
5153 A ResultSet manages the results of a single command.
5254
5355 Parameters:
54- :param connection: The parent connection
55- :param backend: The backend client
56+ :param connection: The parent connection that was used to execute this command
57+ :param backend: The specialised backend client to be invoked in the fetch phase
5658 :param arraysize: The max number of rows to fetch at a time (PEP-249)
5759 :param buffer_size_bytes: The size (in bytes) of the internal buffer + max fetch
5860 :param command_id: The command ID
@@ -156,9 +158,9 @@ class ThriftResultSet(ResultSet):
156158
157159 def __init__ (
158160 self ,
159- connection : " Connection" ,
160- execute_response : " ExecuteResponse" ,
161- thrift_client : " ThriftDatabricksClient" ,
161+ connection : Connection ,
162+ execute_response : ExecuteResponse ,
163+ thrift_client : ThriftDatabricksClient ,
162164 buffer_size_bytes : int = 104857600 ,
163165 arraysize : int = 10000 ,
164166 use_cloud_fetch : bool = True ,
@@ -314,6 +316,7 @@ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
314316 if size < 0 :
315317 raise ValueError ("size argument for fetchmany is %s but must be >= 0" , size )
316318 results = self .results .next_n_rows (size )
319+ partial_result_chunks = [results ]
317320 n_remaining_rows = size - results .num_rows
318321 self ._next_row_index += results .num_rows
319322
@@ -324,11 +327,11 @@ def fetchmany_arrow(self, size: int) -> "pyarrow.Table":
324327 ):
325328 self ._fill_results_buffer ()
326329 partial_results = self .results .next_n_rows (n_remaining_rows )
327- results = pyarrow . concat_tables ([ results , partial_results ] )
330+ partial_result_chunks . append ( partial_results )
328331 n_remaining_rows -= partial_results .num_rows
329332 self ._next_row_index += partial_results .num_rows
330333
331- return results
334+ return pyarrow . concat_tables ( partial_result_chunks , use_threads = True )
332335
333336 def fetchmany_columnar (self , size : int ):
334337 """
@@ -359,7 +362,7 @@ def fetchall_arrow(self) -> "pyarrow.Table":
359362 """Fetch all (remaining) rows of a query result, returning them as a PyArrow table."""
360363 results = self .results .remaining_rows ()
361364 self ._next_row_index += results .num_rows
362-
365+ partial_result_chunks = [ results ]
363366 while not self .has_been_closed_server_side and self .is_direct_results :
364367 self ._fill_results_buffer ()
365368 partial_results = self .results .remaining_rows ()
@@ -368,7 +371,7 @@ def fetchall_arrow(self) -> "pyarrow.Table":
368371 ):
369372 results = self .merge_columnar (results , partial_results )
370373 else :
371- results = pyarrow . concat_tables ([ results , partial_results ] )
374+ partial_result_chunks . append ( partial_results )
372375 self ._next_row_index += partial_results .num_rows
373376
374377 # If PyArrow is installed and we have a ColumnTable result, convert it to PyArrow Table
@@ -379,7 +382,7 @@ def fetchall_arrow(self) -> "pyarrow.Table":
379382 for name , col in zip (results .column_names , results .column_table )
380383 }
381384 return pyarrow .Table .from_pydict (data )
382- return results
385+ return pyarrow . concat_tables ( partial_result_chunks , use_threads = True )
383386
384387 def fetchall_columnar (self ):
385388 """Fetch all (remaining) rows of a query result, returning them as a Columnar table."""
@@ -452,9 +455,9 @@ class SeaResultSet(ResultSet):
452455
453456 def __init__ (
454457 self ,
455- connection : " Connection" ,
456- execute_response : " ExecuteResponse" ,
457- sea_client : " SeaDatabricksClient" ,
458+ connection : Connection ,
459+ execute_response : ExecuteResponse ,
460+ sea_client : SeaDatabricksClient ,
458461 buffer_size_bytes : int = 104857600 ,
459462 arraysize : int = 10000 ,
460463 result_data = None ,
0 commit comments