Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions tree/ntuple/src/RNTupleDescriptorFmt.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void ROOT::RNTupleDescriptor::PrintInfo(std::ostream &output) const
std::uint64_t nBytesOnStorage = 0;
std::uint64_t nBytesInMemory = 0;
std::uint64_t nPages = 0;
std::unordered_set<std::uint64_t> seenPages{};
int compression = -1;
for (const auto &column : fColumnDescriptors) {
// Alias columns (columns of projected fields) don't contribute to the storage consumption. Count them
Expand Down Expand Up @@ -124,15 +125,22 @@ void ROOT::RNTupleDescriptor::PrintInfo(std::ostream &output) const
}
const auto &pageRange = cluster.second.GetPageRange(column.second.GetPhysicalId());
auto idx = cluster2Idx[cluster.first];
std::uint64_t locatorOffset;
for (const auto &page : pageRange.GetPageInfos()) {
nBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
nBytesInMemory += page.GetNElements() * elementSize;
clusters[idx].fNBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
clusters[idx].fNBytesInMemory += page.GetNElements() * elementSize;
++clusters[idx].fNPages;
info.fNBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
++info.fNPages;
++nPages;
locatorOffset = page.GetLocator().GetType() == ROOT::RNTupleLocator::ELocatorType::kTypeDAOS
? page.GetLocator().GetPosition<RNTupleLocatorObject64>().GetLocation()
: page.GetLocator().GetPosition<std::uint64_t>();
auto [_, pageAdded] = seenPages.emplace(locatorOffset);
if (pageAdded) {
nBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
nBytesInMemory += page.GetNElements() * elementSize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think for the in-memory summary, we should not take into account the page deduplication (because we don't deduplicate in memory).

clusters[idx].fNBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
clusters[idx].fNBytesInMemory += page.GetNElements() * elementSize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, I think I'd always sum up the in-memory size

++clusters[idx].fNPages;
info.fNBytesOnStorage += page.GetLocator().GetNBytesOnStorage();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here it gets tricky. As is, deduplicated pages count for none of the columns. I'd rather error the other way and account for all of the pages (aliased or not) for the per column statistics. If we want to do it correctly, I think we need to keep a seenPages map per column and then count deduplicated pages only once. The meaning of this would be "if I only stored this column and nothing else on disk, this is what it takes".

++info.fNPages;
++nPages;
}
}
}
columns.emplace_back(info);
Expand Down Expand Up @@ -168,8 +176,11 @@ void ROOT::RNTupleDescriptor::PrintInfo(std::ostream &output) const
<< clusters[i].fFirstEntry + clusters[i].fNEntries - 1 << "] -- " << clusters[i].fNEntries << "\n";
output << " " << " # Pages: " << clusters[i].fNPages << "\n";
output << " " << " Size on storage: " << clusters[i].fNBytesOnStorage << " B\n";
output << " " << " Compression: " << std::fixed << std::setprecision(2)
<< float(clusters[i].fNBytesInMemory) / float(float(clusters[i].fNBytesOnStorage)) << std::endl;
output << " " << " Compression: " << std::fixed << std::setprecision(2);
if (clusters[i].fNPages > 0)
output << float(clusters[i].fNBytesInMemory) / float(float(clusters[i].fNBytesOnStorage)) << std::endl;
else
output << "N/A" << std::endl;
}

output << "------------------------------------------------------------\n";
Expand Down Expand Up @@ -198,8 +209,11 @@ void ROOT::RNTupleDescriptor::PrintInfo(std::ostream &output) const
output << " Avg elements / page: " << avgElementsPerPage << "\n";
output << " Avg page size: " << avgPageSize << " B\n";
output << " Size on storage: " << col.fNBytesOnStorage << " B\n";
output << " Compression: " << std::fixed << std::setprecision(2)
<< float(col.fElementSize * col.fNElements) / float(col.fNBytesOnStorage) << "\n";
output << " Compression: " << std::fixed << std::setprecision(2);
if (col.fNPages > 0)
output << float(col.fElementSize * col.fNElements) / float(col.fNBytesOnStorage) << std::endl;
else
output << "N/A" << std::endl;
output << "............................................................" << std::endl;
}
}
18 changes: 9 additions & 9 deletions tree/ntuple/test/ntuple_print.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ TEST(RNtuplePrint, FullString)
" # Fields: 4\n"
" # Columns: 2\n"
" # Alias Columns: 1\n"
" # Pages: 2\n"
" # Pages: 1\n"
" # Clusters: 1\n"
" Size on storage: 8 B\n"
" Size on storage: 4 B\n"
" Compression rate: 1.00\n"
" Header size: .* B\n"
" Footer size: .* B\n"
Expand All @@ -51,19 +51,19 @@ TEST(RNtuplePrint, FullString)
"CLUSTER DETAILS\n"
"------------------------------------------------------------\n"
" # 0 Entry range: .0..0. -- 1\n"
" # Pages: 2\n"
" Size on storage: 8 B\n"
" # Pages: 1\n"
" Size on storage: 4 B\n"
" Compression: 1.00\n"
"------------------------------------------------------------\n"
"COLUMN DETAILS\n"
"------------------------------------------------------------\n"
" px .#0. -- Real32 .id:0.\n"
" # Elements: 1\n"
" # Pages: 1\n"
" Avg elements / page: 1\n"
" Avg page size: 4 B\n"
" Size on storage: 4 B\n"
" Compression: 1.00\n"
" # Pages: 0\n"
" Avg elements / page: 0\n"
" Avg page size: 0 B\n"
" Size on storage: 0 B\n"
" Compression: N/A\n"
"............................................................\n"
" py .#0. -- Real32 .id:1.\n"
" # Elements: 1\n"
Expand Down