Skip to content

Commit 59384df

Browse files
authored
feat(catalog): report total table files size in dataset profile (#14312)
1 parent 38cd4d0 commit 59384df

File tree

2 files changed

+72
-0
lines changed

2 files changed

+72
-0
lines changed

metadata-service/iceberg-catalog/src/main/java/io/datahubproject/iceberg/catalog/TableOrViewOpsDelegate.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ protected DatasetProfile getDataSetProfile(TableMetadata metadata) {
352352
if (totalRecordsStr != null) {
353353
dataSetProfile.setRowCount(Long.parseLong(totalRecordsStr));
354354
}
355+
String totalFileSizeStr = currentSnapshot.summary().get(SnapshotSummary.TOTAL_FILE_SIZE_PROP);
356+
if (totalFileSizeStr != null) {
357+
dataSetProfile.setSizeInBytes(Long.parseLong(totalFileSizeStr));
358+
}
355359
}
356360

357361
return dataSetProfile;

metadata-service/iceberg-catalog/src/test/java/io/datahubproject/iceberg/catalog/TableOpsDelegateTest.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,13 @@
3232
import io.datahubproject.metadata.context.OperationContext;
3333
import io.datahubproject.schematron.converters.avro.AvroSchemaConverter;
3434
import java.time.Instant;
35+
import java.util.HashMap;
36+
import java.util.Map;
3537
import java.util.Optional;
3638
import java.util.Set;
3739
import org.apache.iceberg.Schema;
40+
import org.apache.iceberg.Snapshot;
41+
import org.apache.iceberg.SnapshotSummary;
3842
import org.apache.iceberg.TableMetadata;
3943
import org.apache.iceberg.TableMetadataParser;
4044
import org.apache.iceberg.avro.AvroSchemaUtil;
@@ -426,4 +430,68 @@ public void testRefreshNotFound() {
426430
when(mockWarehouse.getIcebergMetadata(identifier)).thenReturn(Optional.empty());
427431
assertNull(tableDelegate.refresh());
428432
}
433+
434+
@Test
435+
public void testGetDataSetProfileWithTotalFileSize() {
436+
// Create a real TableOpsDelegate instance for testing the actual getDataSetProfile method
437+
TableOpsDelegate realTableDelegate =
438+
new TableOpsDelegate(
439+
mockWarehouse, identifier, mockEntityService, mockOperationContext, mockFileIOFactory);
440+
441+
// Mock TableMetadata with snapshot and summary
442+
TableMetadata mockMetadata = mock(TableMetadata.class);
443+
Schema schema =
444+
new Schema(
445+
Types.NestedField.required(1, "id", Types.LongType.get()),
446+
Types.NestedField.optional(2, "data", Types.StringType.get()));
447+
when(mockMetadata.schema()).thenReturn(schema);
448+
449+
// Mock Snapshot with summary containing total file size
450+
Snapshot mockSnapshot = mock(Snapshot.class);
451+
Map<String, String> mockSummary = new HashMap<>();
452+
mockSummary.put(SnapshotSummary.TOTAL_RECORDS_PROP, "1000");
453+
mockSummary.put(SnapshotSummary.TOTAL_FILE_SIZE_PROP, "5242880"); // 5MB in bytes
454+
when(mockSnapshot.summary()).thenReturn(mockSummary);
455+
when(mockMetadata.currentSnapshot()).thenReturn(mockSnapshot);
456+
457+
// Call the actual getDataSetProfile method
458+
DatasetProfile result = realTableDelegate.getDataSetProfile(mockMetadata);
459+
460+
// Verify the results
461+
assertEquals(result.getColumnCount().longValue(), 2L);
462+
assertEquals(result.getRowCount().longValue(), 1000L);
463+
assertEquals(result.getSizeInBytes().longValue(), 5242880L);
464+
}
465+
466+
@Test
467+
public void testGetDataSetProfileWithoutTotalFileSize() {
468+
// Create a real TableOpsDelegate instance for testing the actual getDataSetProfile method
469+
TableOpsDelegate realTableDelegate =
470+
new TableOpsDelegate(
471+
mockWarehouse, identifier, mockEntityService, mockOperationContext, mockFileIOFactory);
472+
473+
// Mock TableMetadata with snapshot but no file size in summary
474+
TableMetadata mockMetadata = mock(TableMetadata.class);
475+
Schema schema =
476+
new Schema(
477+
Types.NestedField.required(1, "id", Types.LongType.get()),
478+
Types.NestedField.optional(2, "data", Types.StringType.get()));
479+
when(mockMetadata.schema()).thenReturn(schema);
480+
481+
// Mock Snapshot with summary containing only row count, no file size
482+
Snapshot mockSnapshot = mock(Snapshot.class);
483+
Map<String, String> mockSummary = new HashMap<>();
484+
mockSummary.put(SnapshotSummary.TOTAL_RECORDS_PROP, "500");
485+
// No TOTAL_FILE_SIZE_PROP in the map
486+
when(mockSnapshot.summary()).thenReturn(mockSummary);
487+
when(mockMetadata.currentSnapshot()).thenReturn(mockSnapshot);
488+
489+
// Call the actual getDataSetProfile method
490+
DatasetProfile result = realTableDelegate.getDataSetProfile(mockMetadata);
491+
492+
// Verify the results
493+
assertEquals(result.getColumnCount().longValue(), 2L);
494+
assertEquals(result.getRowCount().longValue(), 500L);
495+
assertNull(result.getSizeInBytes()); // Should be null when no file size info
496+
}
429497
}

0 commit comments

Comments
 (0)