Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 8 additions & 15 deletions parquet/src/file/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,9 @@ pub struct RowGroupMetaData {
num_rows: i64,
total_byte_size: i64,
schema_descr: SchemaDescPtr,
/// Cube: We'll roundtrip file_offset from thrift, but we always originate it as None. It was
/// not present in the older Parquet RowGroupMetaData definition.
file_offset: Option<i64>,
/// Ordinal position of this row group in file
ordinal: Option<i16>,
}
Expand Down Expand Up @@ -280,37 +283,25 @@ impl RowGroupMetaData {
let cc = ColumnChunkMetaData::from_thrift(d.clone(), c)?;
columns.push(cc);
}
// Notably, the function to_thrift, below, doesn't write these fields, and RowGroupMetadata doesn't have them.
if rg.file_offset.is_some() {
return Err(ParquetError::NYI(
"Parsing RowGroup file_offset fields is not yet implemented".to_string(),
));
}
if rg.total_compressed_size.is_some() {
return Err(ParquetError::NYI(
"Parsing RowGroup total_compressed_size fields is not yet implemented"
.to_string(),
));
}
Ok(RowGroupMetaData {
columns,
num_rows,
total_byte_size,
file_offset: rg.file_offset,
schema_descr,
ordinal: rg.ordinal,
})
}

/// Method to convert to Thrift.
pub fn to_thrift(&self) -> RowGroup {
// TODO: Understand file_offset and total_compressed_size fields.
RowGroup {
columns: self.columns().iter().map(|v| v.to_thrift()).collect(),
total_byte_size: self.total_byte_size,
num_rows: self.num_rows,
sorting_columns: None,
file_offset: None,
total_compressed_size: None,
file_offset: self.file_offset,
total_compressed_size: Some(self.compressed_size()),
ordinal: self.ordinal,
}
}
Expand Down Expand Up @@ -375,6 +366,8 @@ impl RowGroupMetaDataBuilder {
columns: self.columns,
num_rows: self.num_rows,
total_byte_size: self.total_byte_size,
// Cube: Here is where we originate the None value for the optionally supplied value file_offset field.
file_offset: None,
schema_descr: self.schema_descr,
ordinal: self.ordinal,
})
Expand Down
Loading