Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion tree/ntupleutil/v7/src/RNTupleInspector.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ void ROOT::Experimental::RNTupleInspector::CollectColumnInfo()
// to report the size _in memory_ of column elements.
std::uint32_t elemSize = RColumnElementBase::Generate(colDesc.GetType())->GetSize();
std::uint64_t nElems = 0;
std::unordered_set<std::uint64_t> seenPages{};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can move the map down into the first level of the for loop because we don't deduplicate pages across clusters. This will also limit the memory used for the map.

std::vector<std::uint64_t> compressedPageSizes{};

for (const auto &clusterDescriptor : fDescriptor.GetClusterIterable()) {
Expand Down Expand Up @@ -88,8 +89,16 @@ void ROOT::Experimental::RNTupleInspector::CollectColumnInfo()

const auto &pageRange = clusterDescriptor.GetPageRange(colId);

std::uint64_t locatorOffset;
for (const auto &page : pageRange.GetPageInfos()) {
compressedPageSizes.emplace_back(page.GetLocator().GetNBytesOnStorage());
locatorOffset = page.GetLocator().GetType() == ROOT::RNTupleLocator::ELocatorType::kTypeDAOS
? page.GetLocator().GetPosition<RNTupleLocatorObject64>().GetLocation()
: page.GetLocator().GetPosition<std::uint64_t>();
auto [_, pageAdded] = seenPages.emplace(locatorOffset);
if (pageAdded) {
compressedPageSizes.emplace_back(page.GetLocator().GetNBytesOnStorage());
}
// For the moment, we actually load and decompress aliased pages multiple times
fUncompressedSize += page.GetNElements() * elemSize;
}
}
Expand Down
24 changes: 24 additions & 0 deletions tree/ntupleutil/v7/test/ntuple_inspector.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,30 @@ TEST(RNTupleInspector, SizeProjectedFields)
EXPECT_EQ(inspector->GetFieldTreeInspector("muonPt").GetCompressedSize(), inspector->GetCompressedSize());
}

TEST(RNTupleInspector, SizeSamePageMerging)
{
FileRaii fileGuard("test_ntuple_inspector_size_same_page_merging.root");
{
auto model = RNTupleModel::Create();
auto nFldInt = model->MakeField<std::int32_t>("int");

auto writeOptions = RNTupleWriteOptions();
writeOptions.SetCompression(0);
writeOptions.SetInitialUnzippedPageSize(16);
writeOptions.SetMaxUnzippedPageSize(16);
auto ntuple = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath(), writeOptions);

for (int32_t i = 0; i < 64; ++i) {
*nFldInt = 0;
ntuple->Fill();
}
}

auto inspector = RNTupleInspector::Create("ntuple", fileGuard.GetPath());
EXPECT_EQ(inspector->GetUncompressedSize(), 256);
EXPECT_EQ(inspector->GetCompressedSize(), 16);
}

TEST(RNTupleInspector, ColumnInfoCompressed)
{
FileRaii fileGuard("test_ntuple_inspector_column_info_compressed.root");
Expand Down