@@ -884,47 +884,31 @@ def transform_paramstyle(
884884def create_arrow_table_from_arrow_file (
885885 file_bytes : bytes , description
886886) -> "pyarrow.Table" :
887- """
888- Create an Arrow table from an Arrow file.
889-
890- Args:
891- file_bytes: The bytes of the Arrow file
892- description: The column descriptions
893-
894- Returns:
895- pyarrow.Table: The Arrow table
896- """
897887 arrow_table = convert_arrow_based_file_to_arrow_table (file_bytes )
898888 return convert_decimals_in_arrow_table (arrow_table , description )
899889
900890
901891def convert_arrow_based_file_to_arrow_table (file_bytes : bytes ):
902- """
903- Convert an Arrow file to an Arrow table.
904-
905- Args:
906- file_bytes: The bytes of the Arrow file
907-
908- Returns:
909- pyarrow.Table: The Arrow table
910- """
911892 try :
912893 return pyarrow .ipc .open_stream (file_bytes ).read_all ()
913894 except Exception as e :
914895 raise RuntimeError ("Failure to convert arrow based file to arrow table" , e )
915896
897+ def convert_arrow_based_set_to_arrow_table (arrow_batches , lz4_compressed , schema_bytes ):
898+ ba = bytearray ()
899+ ba += schema_bytes
900+ n_rows = 0
901+ for arrow_batch in arrow_batches :
902+ n_rows += arrow_batch .rowCount
903+ ba += (
904+ lz4 .frame .decompress (arrow_batch .batch )
905+ if lz4_compressed
906+ else arrow_batch .batch
907+ )
908+ arrow_table = pyarrow .ipc .open_stream (ba ).read_all ()
909+ return arrow_table , n_rows
916910
917911def convert_decimals_in_arrow_table (table , description ) -> "pyarrow.Table" :
918- """
919- Convert decimal columns in an Arrow table to the correct precision and scale.
920-
921- Args:
922- table: The Arrow table
923- description: The column descriptions
924-
925- Returns:
926- pyarrow.Table: The Arrow table with correct decimal types
927- """
928912 new_columns = []
929913 new_fields = []
930914
@@ -951,35 +935,7 @@ def convert_decimals_in_arrow_table(table, description) -> "pyarrow.Table":
951935
952936 return pyarrow .Table .from_arrays (new_columns , schema = new_schema )
953937
954-
955- def convert_arrow_based_set_to_arrow_table (arrow_batches , lz4_compressed , schema_bytes ):
956- """
957- Convert a set of Arrow batches to an Arrow table.
958-
959- Args:
960- arrow_batches: The Arrow batches
961- lz4_compressed: Whether the batches are LZ4 compressed
962- schema_bytes: The schema bytes
963-
964- Returns:
965- Tuple[pyarrow.Table, int]: The Arrow table and the number of rows
966- """
967- ba = bytearray ()
968- ba += schema_bytes
969- n_rows = 0
970- for arrow_batch in arrow_batches :
971- n_rows += arrow_batch .rowCount
972- ba += (
973- lz4 .frame .decompress (arrow_batch .batch )
974- if lz4_compressed
975- else arrow_batch .batch
976- )
977- arrow_table = pyarrow .ipc .open_stream (ba ).read_all ()
978- return arrow_table , n_rows
979-
980-
981938def convert_to_assigned_datatypes_in_column_table (column_table , description ):
982-
983939 converted_column_table = []
984940 for i , col in enumerate (column_table ):
985941 if description [i ][1 ] == "decimal" :
0 commit comments