@@ -217,6 +217,9 @@ pub struct RowGroupMetaData {
217217 num_rows : i64 ,
218218 total_byte_size : i64 ,
219219 schema_descr : SchemaDescPtr ,
220+ /// Cube: We'll roundtrip file_offset from thrift, but we always originate it as None. It was
221+ /// not present in the older Parquet RowGroupMetaData definition.
222+ file_offset : Option < i64 > ,
220223 /// Ordinal position of this row group in file
221224 ordinal : Option < i16 > ,
222225}
@@ -280,37 +283,25 @@ impl RowGroupMetaData {
280283 let cc = ColumnChunkMetaData :: from_thrift ( d. clone ( ) , c) ?;
281284 columns. push ( cc) ;
282285 }
283- // Notably, the function to_thrift, below, doesn't write these fields, and RowGroupMetadata doesn't have them.
284- if rg. file_offset . is_some ( ) {
285- return Err ( ParquetError :: NYI (
286- "Parsing RowGroup file_offset fields is not yet implemented" . to_string ( ) ,
287- ) ) ;
288- }
289- if rg. total_compressed_size . is_some ( ) {
290- return Err ( ParquetError :: NYI (
291- "Parsing RowGroup total_compressed_size fields is not yet implemented"
292- . to_string ( ) ,
293- ) ) ;
294- }
295286 Ok ( RowGroupMetaData {
296287 columns,
297288 num_rows,
298289 total_byte_size,
290+ file_offset : rg. file_offset ,
299291 schema_descr,
300292 ordinal : rg. ordinal ,
301293 } )
302294 }
303295
304296 /// Method to convert to Thrift.
305297 pub fn to_thrift ( & self ) -> RowGroup {
306- // TODO: Understand file_offset and total_compressed_size fields.
307298 RowGroup {
308299 columns : self . columns ( ) . iter ( ) . map ( |v| v. to_thrift ( ) ) . collect ( ) ,
309300 total_byte_size : self . total_byte_size ,
310301 num_rows : self . num_rows ,
311302 sorting_columns : None ,
312- file_offset : None ,
313- total_compressed_size : None ,
303+ file_offset : self . file_offset ,
304+ total_compressed_size : Some ( self . compressed_size ( ) ) ,
314305 ordinal : self . ordinal ,
315306 }
316307 }
@@ -375,6 +366,8 @@ impl RowGroupMetaDataBuilder {
375366 columns : self . columns ,
376367 num_rows : self . num_rows ,
377368 total_byte_size : self . total_byte_size ,
369+ // Cube: Here is where we originate the None value for the optionally supplied value file_offset field.
370+ file_offset : None ,
378371 schema_descr : self . schema_descr ,
379372 ordinal : self . ordinal ,
380373 } )
0 commit comments