@@ -1290,3 +1290,64 @@ def test_to_parquet_decimal(session, bucket, database):
1290
1290
assert df2 [df2 .id == 2 ].iloc [0 ].decimal_5 is None
1291
1291
assert df2 [df2 .id == 3 ].iloc [0 ].decimal_2 == Decimal ((0 , (1 , 9 , 0 ), - 2 ))
1292
1292
assert df2 [df2 .id == 3 ].iloc [0 ].decimal_5 == Decimal ((0 , (1 , 9 , 0 , 0 , 0 , 0 ), - 5 ))
1293
+
1294
+
1295
+ def test_read_parquet_dataset (session , bucket ):
1296
+ df = pd .DataFrame ({
1297
+ "id" : [1 , 2 , 3 ],
1298
+ "decimal_2" : [Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1299
+ Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1300
+ Decimal ((0 , (1 , 9 , 0 ), - 2 ))],
1301
+ "decimal_5" : [
1302
+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1303
+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1304
+ Decimal ((0 , (1 , 9 , 0 , 0 , 0 , 0 ), - 5 ))
1305
+ ],
1306
+ "float" : [1.1 , 2.2 , 3.3 ],
1307
+ "list_int" : [[1 , 2 ], [1 ], [3 , 4 , 5 ]],
1308
+ "list_float" : [[1.0 , 2.0 , 3.0 ], [9.9 ], [4.0 , 5.0 ]],
1309
+ "list_string" : [["foo" ], ["xxx" ], ["boo" , "bar" ]],
1310
+ "list_timestamp" : [[datetime (2019 , 1 , 1 ), datetime (2019 , 1 , 2 )], [datetime (2019 , 1 , 3 )], [datetime (2019 , 1 ,
1311
+ 3 )]],
1312
+ "partition" : [0 , 0 , 1 ]
1313
+ })
1314
+ path = f"s3://{ bucket } /test_read_parquet/"
1315
+ session .pandas .to_parquet (dataframe = df ,
1316
+ path = path ,
1317
+ mode = "overwrite" ,
1318
+ preserve_index = False ,
1319
+ procs_cpu_bound = 4 ,
1320
+ partition_cols = ["partition" ])
1321
+ df2 = session .pandas .read_parquet (path = path )
1322
+ assert len (list (df .columns )) == len (list (df2 .columns ))
1323
+ assert len (df .index ) == len (df2 .index )
1324
+
1325
+
1326
+ def test_read_parquet_file (session , bucket ):
1327
+ df = pd .DataFrame ({
1328
+ "id" : [1 , 2 , 3 ],
1329
+ "decimal_2" : [Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1330
+ Decimal ((0 , (1 , 9 , 9 ), - 2 )),
1331
+ Decimal ((0 , (1 , 9 , 0 ), - 2 ))],
1332
+ "decimal_5" : [
1333
+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1334
+ Decimal ((0 , (1 , 9 , 9 , 9 , 9 , 9 ), - 5 )),
1335
+ Decimal ((0 , (1 , 9 , 0 , 0 , 0 , 0 ), - 5 ))
1336
+ ],
1337
+ "float" : [1.1 , 2.2 , 3.3 ],
1338
+ "list_int" : [[1 , 2 ], [1 ], [3 , 4 , 5 ]],
1339
+ "list_float" : [[1.0 , 2.0 , 3.0 ], [9.9 ], [4.0 , 5.0 ]],
1340
+ "list_string" : [["foo" ], ["xxx" ], ["boo" , "bar" ]],
1341
+ "list_timestamp" : [[datetime (2019 , 1 , 1 ), datetime (2019 , 1 , 2 )], [datetime (2019 , 1 , 3 )], [datetime (2019 , 1 ,
1342
+ 3 )]],
1343
+ "partition" : [0 , 0 , 1 ]
1344
+ })
1345
+ path = f"s3://{ bucket } /test_read_parquet/"
1346
+ filepath = session .pandas .to_parquet (dataframe = df ,
1347
+ path = path ,
1348
+ mode = "overwrite" ,
1349
+ preserve_index = False ,
1350
+ procs_cpu_bound = 1 )
1351
+ df2 = session .pandas .read_parquet (path = filepath [0 ])
1352
+ assert len (list (df .columns )) == len (list (df2 .columns ))
1353
+ assert len (df .index ) == len (df2 .index )
0 commit comments