Skip to content

Commit 5590ded

Browse files
committed
Remove serialization non C41 constructors from stats
With this change in, PR #4685 and others to follow should: a) remove Subarray::set_stats and respective call from serialization/query.cc b) where Subarray is constructed in serialization/query.cc, change constructor call with the following code snippet: auto &stats_data = stats_from_capnp(reader.getStats()); Subarray subarray(array, layout, query_stats, stats_data, dummy_logger, true); c) The constructor calls parent_stats->create_child(prefix, stats_data); d) When all migrations are done, make Stats::populate_with_data private
1 parent 47b3eb8 commit 5590ded

File tree

12 files changed

+192
-59
lines changed

12 files changed

+192
-59
lines changed

tiledb/sm/query/query.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1663,6 +1663,10 @@ stats::Stats* Query::stats() const {
16631663
return stats_;
16641664
}
16651665

1666+
void Query::set_stats(const stats::StatsData& data) {
1667+
stats_->populate_with_data(data);
1668+
}
1669+
16661670
shared_ptr<Buffer> Query::rest_scratch() const {
16671671
return rest_scratch_;
16681672
}

tiledb/sm/query/query.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,11 @@ class Query {
655655
/** Returns the internal stats object. */
656656
stats::Stats* stats() const;
657657

658+
/** Populate the owned stats instance with data.
659+
* To be removed when the class will get a C41 constructor.
660+
*/
661+
void set_stats(const stats::StatsData& data);
662+
658663
/** Returns the scratch space used for REST requests. */
659664
shared_ptr<Buffer> rest_scratch() const;
660665

tiledb/sm/query/strategy_base.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ stats::Stats* StrategyBase::stats() const {
6565
return stats_;
6666
}
6767

68+
void StrategyBase::set_stats(const stats::StatsData& data) {
69+
stats_->populate_with_data(data);
70+
}
71+
6872
/* ****************************** */
6973
/* PROTECTED METHODS */
7074
/* ****************************** */

tiledb/sm/query/strategy_base.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ class StrategyBase {
208208
/** Returns `stats_`. */
209209
stats::Stats* stats() const;
210210

211+
/** Populate the owned stats instance with data.
212+
* To be removed when the class will get a C41 constructor.
213+
*/
214+
void set_stats(const stats::StatsData& data);
215+
211216
/** Returns the configured offsets format mode. */
212217
std::string offsets_mode() const;
213218

tiledb/sm/serialization/query.cc

Lines changed: 25 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -118,27 +118,27 @@ Status stats_to_capnp(Stats& stats, capnp::Stats::Builder* stats_builder) {
118118
return Status::Ok();
119119
}
120120

121-
Status stats_from_capnp(
122-
const capnp::Stats::Reader& stats_reader, Stats* stats) {
121+
StatsData stats_from_capnp(const capnp::Stats::Reader& stats_reader) {
122+
std::unordered_map<std::string, uint64_t> counters;
123+
std::unordered_map<std::string, double> timers;
124+
123125
if (stats_reader.hasCounters()) {
124-
auto counters = stats->counters();
125126
auto counters_reader = stats_reader.getCounters();
126127
for (const auto entry : counters_reader.getEntries()) {
127128
auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()};
128-
(*counters)[std::string{key}] = entry.getValue();
129+
counters[std::string(key)] = entry.getValue();
129130
}
130131
}
131132

132133
if (stats_reader.hasTimers()) {
133-
auto timers = stats->timers();
134134
auto timers_reader = stats_reader.getTimers();
135135
for (const auto entry : timers_reader.getEntries()) {
136136
auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()};
137-
(*timers)[std::string{key}] = entry.getValue();
137+
timers[std::string(key)] = entry.getValue();
138138
}
139139
}
140140

141-
return Status::Ok();
141+
return stats::StatsData(counters, timers);
142142
}
143143

144144
void range_buffers_to_capnp(
@@ -328,11 +328,8 @@ Status subarray_from_capnp(
328328

329329
// If cap'n proto object has stats set it on c++ object
330330
if (reader.hasStats()) {
331-
stats::Stats* stats = subarray->stats();
332-
// We should always have a stats here
333-
if (stats != nullptr) {
334-
RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats));
335-
}
331+
auto stats_data = stats_from_capnp(reader.getStats());
332+
subarray->set_stats(stats_data);
336333
}
337334

338335
if (reader.hasRelevantFragments()) {
@@ -566,11 +563,8 @@ Status subarray_partitioner_from_capnp(
566563

567564
// If cap'n proto object has stats set it on c++ object
568565
if (reader.hasStats()) {
569-
auto stats = partitioner->stats();
570-
// We should always have stats
571-
if (stats != nullptr) {
572-
RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats));
573-
}
566+
auto stats_data = stats_from_capnp(reader.getStats());
567+
partitioner->set_stats(stats_data);
574568
}
575569

576570
return Status::Ok();
@@ -1146,11 +1140,8 @@ Status reader_from_capnp(
11461140

11471141
// If cap'n proto object has stats set it on c++ object
11481142
if (reader_reader.hasStats()) {
1149-
stats::Stats* stats = reader->stats();
1150-
// We should always have a stats here
1151-
if (stats != nullptr) {
1152-
RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats));
1153-
}
1143+
auto stats_data = stats_from_capnp(reader_reader.getStats());
1144+
reader->set_stats(stats_data);
11541145
}
11551146

11561147
return Status::Ok();
@@ -1187,11 +1178,8 @@ Status index_reader_from_capnp(
11871178

11881179
// If cap'n proto object has stats set it on c++ object
11891180
if (reader_reader.hasStats()) {
1190-
stats::Stats* stats = reader->stats();
1191-
// We should always have a stats here
1192-
if (stats != nullptr) {
1193-
RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats));
1194-
}
1181+
auto stats_data = stats_from_capnp(reader_reader.getStats());
1182+
reader->set_stats(stats_data);
11951183
}
11961184

11971185
return Status::Ok();
@@ -1229,11 +1217,8 @@ Status dense_reader_from_capnp(
12291217

12301218
// If cap'n proto object has stats set it on c++ object
12311219
if (reader_reader.hasStats()) {
1232-
stats::Stats* stats = reader->stats();
1233-
// We should always have a stats here
1234-
if (stats != nullptr) {
1235-
RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats));
1236-
}
1220+
auto stats_data = stats_from_capnp(reader_reader.getStats());
1221+
reader->set_stats(stats_data);
12371222
}
12381223

12391224
return Status::Ok();
@@ -1252,11 +1237,8 @@ Status delete_from_capnp(
12521237

12531238
// If cap'n proto object has stats set it on c++ object
12541239
if (delete_reader.hasStats()) {
1255-
stats::Stats* stats = delete_strategy->stats();
1256-
// We should always have a stats here
1257-
if (stats != nullptr) {
1258-
RETURN_NOT_OK(stats_from_capnp(delete_reader.getStats(), stats));
1259-
}
1240+
auto stats_data = stats_from_capnp(delete_reader.getStats());
1241+
delete_strategy->set_stats(stats_data);
12601242
}
12611243

12621244
return Status::Ok();
@@ -1341,11 +1323,8 @@ Status writer_from_capnp(
13411323

13421324
// If cap'n proto object has stats set it on c++ object
13431325
if (writer_reader.hasStats()) {
1344-
stats::Stats* stats = writer->stats();
1345-
// We should always have a stats here
1346-
if (stats != nullptr) {
1347-
RETURN_NOT_OK(stats_from_capnp(writer_reader.getStats(), stats));
1348-
}
1326+
auto stats_data = stats_from_capnp(writer_reader.getStats());
1327+
writer->set_stats(stats_data);
13491328
}
13501329

13511330
if (query.layout() == Layout::GLOBAL_ORDER &&
@@ -2270,11 +2249,8 @@ Status query_from_capnp(
22702249

22712250
// If cap'n proto object has stats set it on c++ object
22722251
if (query_reader.hasStats()) {
2273-
stats::Stats* stats = query->stats();
2274-
// We should always have a stats here
2275-
if (stats != nullptr) {
2276-
RETURN_NOT_OK(stats_from_capnp(query_reader.getStats(), stats));
2277-
}
2252+
auto stats_data = stats_from_capnp(query_reader.getStats());
2253+
query->set_stats(stats_data);
22782254
}
22792255

22802256
if (query_reader.hasWrittenFragmentInfo()) {
@@ -3212,11 +3188,8 @@ void ordered_dim_label_reader_from_capnp(
32123188

32133189
// If cap'n proto object has stats set it on c++ object
32143190
if (reader_reader.hasStats()) {
3215-
stats::Stats* stats = reader->stats();
3216-
// We should always have a stats here
3217-
if (stats != nullptr) {
3218-
throw_if_not_ok(stats_from_capnp(reader_reader.getStats(), stats));
3219-
}
3191+
auto stats_data = stats_from_capnp(reader_reader.getStats());
3192+
reader->set_stats(stats_data);
32203193
}
32213194
}
32223195

tiledb/sm/stats/global_stats.h

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,71 @@
4949
#include "tiledb/common/heap_memory.h"
5050
#include "tiledb/sm/stats/stats.h"
5151

52+
/**
53+
* Documenting the current stats behavior and architecture as close as
54+
* possible to the code so it's helpful next time some tries to refactor.
55+
*
56+
* Statistics collection is done at the top level via the GlobalStats class
57+
* defined in this file.
58+
* We maintain a global object called `all_stats` which is used to register
59+
* Stats objects, enable/disable collection, reset or dumping the collected
60+
* stats.
61+
*
62+
* The TileDB C API uses the `all_stats` object directly to execute the
63+
* actions iterated above.
64+
*
65+
* The GlobalStats class owns a list called `registered_stats` that has one
66+
* Stats object registered for each Context used. In consequence,
67+
* ContextResources register a Stats object for each Context created, this
68+
* object serves as the root for the tree of all children Stats used in a
69+
* Context.
70+
*
71+
* As mentioned above, the Stats objects used under a Context form a tree.
72+
* Each Stats object mentains a list of children Stats and a pointer to the
73+
* parent Stats object.
74+
* The Stats object created by ContextResources(named "Context.StorageManager")
75+
* is the only Stats constructed in a standalone fashion using the Stats
76+
* constructor, all the other objects under this root Stats are created via
77+
* the Stats::create_child API.
78+
*
79+
* The (current, please update if not accurate anymore) exhaustive list of
80+
* Stats we maintain under a Context is:
81+
* ---------------------------
82+
* ContextResources
83+
* - Query
84+
* - Reader
85+
* - Writer
86+
* - DenseTiler
87+
* - Subarray
88+
* - Deletes
89+
* - Subarray
90+
* - subSubarray
91+
* - SubarrayPartitioner
92+
* - VFS
93+
* - S3
94+
* - ArrayDirectory
95+
* - RestClient
96+
* - Consolidator
97+
* ---------------------------
98+
* Please visualize this as a tree, it was much easier to write
99+
* like this, the tree is too wide.
100+
*
101+
*
102+
* Observed issues:
103+
* - Stats objects currently are created via Stats::create_child API from a
104+
* parent stats object. Child objects such as e.g. Subarray only hold a
105+
* pointer to the Stats object, this means that the Stats objects outlive
106+
* the objects they represent and are kept alive by the tree like structure
107+
* defined by the Stats class.
108+
* In theory, a Context running for a long time would OOM the machine with
109+
* Stats objects.
110+
*
111+
* - Stats::populate_flattened_stats aggregate the collected statistic via
112+
* summing. But we also collect ".min", ".max" statistics as well,
113+
* sum-aggregating those is incorrect. Currently the dump function just
114+
* doesn't print those statistics.
115+
*/
116+
52117
namespace tiledb {
53118
namespace sm {
54119
namespace stats {
@@ -158,8 +223,7 @@ class GlobalStats {
158223
/* ********************************* */
159224

160225
/**
161-
* The singleton instance holding all global stats counters. The report will
162-
* be automatically made when this object is destroyed (at program termination).
226+
* The singleton instance holding all global stats counters and timers.
163227
*/
164228
extern GlobalStats all_stats;
165229

tiledb/sm/stats/stats.cc

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,14 @@ namespace stats {
4848
/* ****************************** */
4949

5050
Stats::Stats(const std::string& prefix)
51+
: Stats(prefix, StatsData{}) {
52+
}
53+
54+
Stats::Stats(const std::string& prefix, const StatsData& data)
5155
: enabled_(true)
5256
, prefix_(prefix + ".")
5357
, parent_(nullptr) {
58+
this->populate_with_data(data);
5459
}
5560

5661
/* ****************************** */
@@ -246,8 +251,12 @@ Stats* Stats::parent() {
246251
}
247252

248253
Stats* Stats::create_child(const std::string& prefix) {
254+
return create_child(prefix, StatsData{});
255+
}
256+
257+
Stats* Stats::create_child(const std::string& prefix, const StatsData& data) {
249258
std::unique_lock<std::mutex> lck(mtx_);
250-
children_.emplace_back(prefix_ + prefix);
259+
children_.emplace_back(prefix_ + prefix, data);
251260
Stats* const child = &children_.back();
252261
child->parent_ = this;
253262
return child;
@@ -272,15 +281,26 @@ void Stats::populate_flattened_stats(
272281
}
273282
}
274283

275-
std::unordered_map<std::string, double>* Stats::timers() {
284+
const std::unordered_map<std::string, double>* Stats::timers() const {
276285
return &timers_;
277286
}
278287

279288
/** Return pointer to conters map, used for serialization only. */
280-
std::unordered_map<std::string, uint64_t>* Stats::counters() {
289+
const std::unordered_map<std::string, uint64_t>* Stats::counters() const {
281290
return &counters_;
282291
}
283292

293+
void Stats::populate_with_data(const StatsData& data) {
294+
auto timers = data.timers();
295+
for (const auto& timer : timers) {
296+
timers_[timer.first] = timer.second;
297+
}
298+
auto counters = data.counters();
299+
for (const auto& counter : counters) {
300+
counters_[counter.first] = counter.second;
301+
}
302+
}
303+
284304
} // namespace stats
285305
} // namespace sm
286306
} // namespace tiledb

0 commit comments

Comments
 (0)