Skip to content

Commit 9ca374c

Browse files
New solomon data source metrics listing strategy (#27518)
1 parent 568786d commit 9ca374c

File tree

13 files changed

+387
-158
lines changed

13 files changed

+387
-158
lines changed

ydb/library/yql/providers/solomon/actors/dq_solomon_metrics_queue.cpp

Lines changed: 158 additions & 99 deletions
Large diffs are not rendered by default.

ydb/library/yql/providers/solomon/common/util.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,13 @@ struct TTimeseries {
3030
std::vector<double> Values;
3131
};
3232

33+
struct TLabelValues {
34+
TString Name;
35+
bool Absent;
36+
bool Truncated;
37+
std::vector<TString> Values;
38+
};
39+
3340
struct TMetricTimeRange {
3441
TSelectors Selectors;
3542
TString Program;

ydb/library/yql/providers/solomon/provider/yql_solomon_config.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ TSolomonConfiguration::TSolomonConfiguration()
99
REGISTER_SETTING(*this, _EnableReading);
1010
REGISTER_SETTING(*this, _EnableRuntimeListing);
1111
REGISTER_SETTING(*this, _TruePointsFindRange);
12-
REGISTER_SETTING(*this, MetricsQueuePageSize);
13-
REGISTER_SETTING(*this, MetricsQueuePrefetchSize);
12+
REGISTER_SETTING(*this, _MaxListingPageSize);
1413
REGISTER_SETTING(*this, MetricsQueueBatchCountLimit);
1514
REGISTER_SETTING(*this, SolomonClientDefaultReplica);
1615
REGISTER_SETTING(*this, ComputeActorBatchSize);

ydb/library/yql/providers/solomon/provider/yql_solomon_config.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ struct TSolomonSettings {
1313
NCommon::TConfSetting<bool, false> _EnableReading;
1414
NCommon::TConfSetting<bool, false> _EnableRuntimeListing;
1515
NCommon::TConfSetting<ui64, false> _TruePointsFindRange;
16-
NCommon::TConfSetting<ui64, false> MetricsQueuePageSize;
17-
NCommon::TConfSetting<ui64, false> MetricsQueuePrefetchSize;
16+
NCommon::TConfSetting<ui64, false> _MaxListingPageSize;
1817
NCommon::TConfSetting<ui64, false> MetricsQueueBatchCountLimit;
1918
NCommon::TConfSetting<TString, false> SolomonClientDefaultReplica;
2019
NCommon::TConfSetting<ui64, false> ComputeActorBatchSize;

ydb/library/yql/providers/solomon/provider/yql_solomon_dq_integration.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,7 @@ class TSolomonDqIntegration: public TDqIntegrationBase {
354354
auto& solomonConfig = State_->Configuration;
355355
auto& sourceSettings = *source.MutableSettings();
356356

357-
auto metricsQueuePageSize = solomonConfig->MetricsQueuePageSize.Get().OrElse(2000);
358-
sourceSettings.insert({"metricsQueuePageSize", ToString(metricsQueuePageSize)});
359-
360-
auto metricsQueuePrefetchSize = solomonConfig->MetricsQueuePrefetchSize.Get().OrElse(4000);
361-
sourceSettings.insert({"metricsQueuePrefetchSize", ToString(metricsQueuePrefetchSize)});
362-
363-
auto metricsQueueBatchCountLimit = solomonConfig->MetricsQueueBatchCountLimit.Get().OrElse(10);
357+
auto metricsQueueBatchCountLimit = solomonConfig->MetricsQueueBatchCountLimit.Get().OrElse(500);
364358
sourceSettings.insert({"metricsQueueBatchCountLimit", ToString(metricsQueueBatchCountLimit)});
365359

366360
auto solomonClientDefaultReplica = solomonConfig->SolomonClientDefaultReplica.Get().OrElse(defaultReplica);
@@ -372,6 +366,9 @@ class TSolomonDqIntegration: public TDqIntegrationBase {
372366
auto truePointsFindRange = solomonConfig->_TruePointsFindRange.Get().OrElse(301);
373367
sourceSettings.insert({"truePointsFindRange", ToString(truePointsFindRange)});
374368

369+
auto maxListingPageSize = solomonConfig->_MaxListingPageSize.Get().OrElse(20000);
370+
sourceSettings.insert({"maxListingPageSize", ToString(maxListingPageSize)});
371+
375372
auto maxApiInflight = solomonConfig->MaxApiInflight.Get().OrElse(40);
376373
sourceSettings.insert({"maxApiInflight", ToString(maxApiInflight)});
377374

ydb/library/yql/providers/solomon/provider/yql_solomon_load_meta.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ namespace {
1616
struct TLoadSolomonMetaRequest {
1717
NSo::ISolomonAccessorClient::TPtr SolomonClient;
1818
NThreading::TFuture<NSo::TGetLabelsResponse> LabelNamesRequest;
19-
NThreading::TFuture<NSo::TListMetricsResponse> ListMetricsRequest;
19+
NThreading::TFuture<NSo::TListMetricsLabelsResponse> LabelValuesRequest;
2020
};
2121

2222
TMaybe<TString> ExtractSetting(const TExprNode& settings, const TString& settingName) {
@@ -102,16 +102,16 @@ class TSolomonLoadTableMetadataTransformer : public TGraphTransformerBase {
102102

103103
auto solomonClient = NSo::ISolomonAccessorClient::Make(std::move(source), credentialsProvider);
104104
auto labelNamesFuture = solomonClient->GetLabelNames(selectors, from, to);
105-
auto listMetricsFuture = solomonClient->ListMetrics(selectors, from, to, 30, 0);
105+
auto listMetricsLabelsFuture = solomonClient->ListMetricsLabels(selectors, from, to);
106106

107107
LabelNamesRequests_[soReadObject.Raw()] = {
108108
.SolomonClient = solomonClient,
109109
.LabelNamesRequest = labelNamesFuture,
110-
.ListMetricsRequest = listMetricsFuture
110+
.LabelValuesRequest = listMetricsLabelsFuture
111111
};
112112

113113
futures.push_back(labelNamesFuture.IgnoreResult());
114-
futures.push_back(listMetricsFuture.IgnoreResult());
114+
futures.push_back(listMetricsLabelsFuture.IgnoreResult());
115115
}
116116
}
117117

@@ -142,10 +142,10 @@ class TSolomonLoadTableMetadataTransformer : public TGraphTransformerBase {
142142
return TStatus::Error;
143143
}
144144

145-
auto listMetricsValue = request.ListMetricsRequest.GetValue();
146-
if (listMetricsValue.Status != NSo::EStatus::STATUS_OK) {
145+
auto listMetricLabelsValue = request.LabelValuesRequest.GetValue();
146+
if (listMetricLabelsValue.Status != NSo::EStatus::STATUS_OK) {
147147
ctx.AddError(TIssue(ctx.GetPosition(node->Pos()),
148-
TStringBuilder() << "Failed to get total metrics count, details: " << listMetricsValue.Error));
148+
TStringBuilder() << "Failed to get total metrics count, details: " << listMetricLabelsValue.Error));
149149
return TStatus::Error;
150150
}
151151

@@ -161,7 +161,7 @@ class TSolomonLoadTableMetadataTransformer : public TGraphTransformerBase {
161161
.RequiredLabelNames()
162162
.Add(labelNames)
163163
.Build()
164-
.TotalMetricsCount<TCoAtom>().Build(ToString(listMetricsValue.Result.TotalCount))
164+
.TotalMetricsCount<TCoAtom>().Build(ToString(listMetricLabelsValue.Result.TotalCount))
165165
.Done().Ptr());
166166
}
167167

ydb/library/yql/providers/solomon/solomon_accessor/client/solomon_accessor_client.cpp

Lines changed: 116 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,62 @@ TListMetricsResponse ProcessListMetricsResponse(NYql::IHTTPGateway::TResult&& re
159159
return TListMetricsResponse(std::move(result), response.Content.size() + response.Content.Headers.size());
160160
}
161161

162+
TListMetricsLabelsResponse ProcessListMetricsLabelsResponse(NYql::IHTTPGateway::TResult&& response) {
163+
TListMetricsLabelsResult result;
164+
165+
if (response.CurlResponseCode != CURLE_OK) {
166+
return TListMetricsLabelsResponse(TStringBuilder{} << "Monitoring api list metrics labels response: " << response.Issues.ToOneLineString() <<
167+
", internal code: " << static_cast<int>(response.CurlResponseCode));
168+
}
169+
170+
if (response.Content.HttpResponseCode < 200 || response.Content.HttpResponseCode >= 300) {
171+
return TListMetricsLabelsResponse(TStringBuilder{} << "Monitoring api list metrics labels response: " << response.Content.data() <<
172+
", internal code: " << response.Content.HttpResponseCode);
173+
}
174+
175+
NJson::TJsonValue json;
176+
try {
177+
NJson::ReadJsonTree(response.Content.data(), &json, /*throwOnError*/ true);
178+
} catch (const std::exception& e) {
179+
return TListMetricsLabelsResponse(TStringBuilder{} << "Monitoring api list metrics labels response is not a valid json: " << e.what());
180+
}
181+
182+
if (!json.IsMap() || !json.Has("labels") || !json.Has("totalCount")) {
183+
return TListMetricsLabelsResponse("Monitoring api list metrics labels response doesn't contain requested info");
184+
}
185+
186+
if (!json["totalCount"].IsInteger() || !json["labels"].IsArray()) {
187+
return TListMetricsLabelsResponse("Monitoring api list metrics labels response contains invalid data");
188+
}
189+
190+
result.TotalCount = json["totalCount"].GetInteger();
191+
192+
for (const auto& label : json["labels"].GetArray()) {
193+
try {
194+
TString name = label["name"].GetStringSafe();
195+
bool absent = label["absent"].GetBooleanSafe();
196+
bool truncated = label["truncated"].GetBooleanSafe();
197+
const auto& jsonValues = label["values"].GetArraySafe();
198+
std::vector<TString> values;
199+
200+
values.reserve(jsonValues.size());
201+
for (const auto& labelValue : jsonValues) {
202+
if (!labelValue.IsString()) {
203+
return TListMetricsLabelsResponse("Monitoring api list metrics labels response contains invalid label values");
204+
}
205+
values.push_back(labelValue.GetString());
206+
}
207+
208+
result.Labels.emplace_back(name, absent, truncated, std::move(values));
209+
} catch (const NJson::TJsonException& e) {
210+
return TListMetricsLabelsResponse(TStringBuilder{} << "Monitoring api list metrics labels response contains invalid labels: " << e.what());
211+
}
212+
213+
}
214+
215+
return TListMetricsLabelsResponse(std::move(result), response.Content.size() + response.Content.Headers.size());
216+
}
217+
162218
TGetPointsCountResponse ProcessGetPointsCountResponse(NYql::IHTTPGateway::TResult&& response, ui64 downsampledPointsCount) {
163219
static std::set<TString> whitelistIssues = {
164220
"Not able to apply function count on vector with size 0"
@@ -247,10 +303,12 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
247303
public:
248304
TSolomonAccessorClient(
249305
const TString& defaultReplica,
306+
ui64 maxListingPageSize,
250307
ui64 maxApiInflight,
251308
NYql::NSo::NProto::TDqSolomonSource&& settings,
252309
std::shared_ptr<NYdb::ICredentialsProvider> credentialsProvider)
253310
: DefaultReplica(defaultReplica)
311+
, MaxListingPageSize(maxListingPageSize)
254312
, Settings(std::move(settings))
255313
, CredentialsProvider(credentialsProvider) {
256314

@@ -269,7 +327,7 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
269327

270328
public:
271329
NThreading::TFuture<TGetLabelsResponse> GetLabelNames(const TSelectors& selectors, TInstant from, TInstant to) const override final {
272-
auto requestUrl = BuildGetLabelsUrl(selectors, from, to);
330+
auto url = BuildGetLabelsUrl(selectors, from, to);
273331

274332
auto resultPromise = NThreading::NewPromise<TGetLabelsResponse>();
275333

@@ -279,14 +337,14 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
279337

280338
DoHttpRequest(
281339
std::move(cb),
282-
std::move(requestUrl)
340+
std::move(url)
283341
);
284342

285343
return resultPromise.GetFuture();
286344
}
287345

288-
NThreading::TFuture<TListMetricsResponse> ListMetrics(const TSelectors& selectors, TInstant from, TInstant to, int pageSize, int page) const override final {
289-
auto requestUrl = BuildListMetricsUrl(selectors, from, to, pageSize, page);
346+
NThreading::TFuture<TListMetricsResponse> ListMetrics(const TSelectors& selectors, TInstant from, TInstant to) const override final {
347+
auto [url, body] = BuildListMetricsHttpParams(selectors, from, to);
290348

291349
auto resultPromise = NThreading::NewPromise<TListMetricsResponse>();
292350

@@ -296,7 +354,26 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
296354

297355
DoHttpRequest(
298356
std::move(cb),
299-
std::move(requestUrl)
357+
std::move(url),
358+
std::move(body)
359+
);
360+
361+
return resultPromise.GetFuture();
362+
}
363+
364+
NThreading::TFuture<TListMetricsLabelsResponse> ListMetricsLabels(const TSelectors& selectors, TInstant from, TInstant to) const override final {
365+
auto [url, body] = BuildListMetricsLabelsHttpParams(selectors, from, to);
366+
367+
auto resultPromise = NThreading::NewPromise<TListMetricsLabelsResponse>();
368+
369+
auto cb = [resultPromise](NYql::IHTTPGateway::TResult&& result) mutable {
370+
resultPromise.SetValue(ProcessListMetricsLabelsResponse(std::move(result)));
371+
};
372+
373+
DoHttpRequest(
374+
std::move(cb),
375+
std::move(url),
376+
std::move(body)
300377
);
301378

302379
return resultPromise.GetFuture();
@@ -317,17 +394,16 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
317394
auto fullSelectors = AddRequiredLabels(selectors);
318395
TString program = TStringBuilder() << "count(" << BuildSelectorsProgram(fullSelectors) << ")";
319396

320-
auto requestUrl = BuildGetPointsCountUrl();
321-
auto requestBody = BuildGetPointsCountBody(program, downsamplingTo, to);
397+
auto [url, body] = BuildGetPointsCountHttpParams(program, downsamplingTo, to);
322398

323399
auto cb = [resultPromise, downsampledPointsCount](NYql::IHTTPGateway::TResult&& response) mutable {
324400
resultPromise.SetValue(ProcessGetPointsCountResponse(std::move(response), downsampledPointsCount));
325401
};
326402

327403
DoHttpRequest(
328404
std::move(cb),
329-
std::move(requestUrl),
330-
std::move(requestBody)
405+
std::move(url),
406+
std::move(body)
331407
);
332408

333409
} else {
@@ -482,14 +558,13 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
482558

483559
builder.AddUrlParam("projectId", GetProjectId());
484560
builder.AddUrlParam("selectors", BuildSelectorsProgram(selectors));
485-
builder.AddUrlParam("forceCluster", DefaultReplica);
486561
builder.AddUrlParam("from", from.ToString());
487562
builder.AddUrlParam("to", to.ToString());
488563

489564
return builder.Build();
490565
}
491566

492-
TString BuildListMetricsUrl(const TSelectors& selectors, TInstant from, TInstant to, int pageSize, int page) const {
567+
std::tuple<TString, TString> BuildListMetricsHttpParams(const TSelectors& selectors, TInstant from, TInstant to) const {
493568
TUrlBuilder builder(GetHttpSolomonEndpoint());
494569

495570
builder.AddPathComponent("api");
@@ -500,38 +575,50 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
500575

501576
builder.AddUrlParam("projectId", GetProjectId());
502577
builder.AddUrlParam("selectors", BuildSelectorsProgram(selectors));
503-
builder.AddUrlParam("forceCluster", DefaultReplica);
504578
builder.AddUrlParam("from", from.ToString());
505579
builder.AddUrlParam("to", to.ToString());
506-
builder.AddUrlParam("pageSize", std::to_string(pageSize));
507-
builder.AddUrlParam("page", std::to_string(page));
580+
builder.AddUrlParam("pageSize", ToString(MaxListingPageSize));
508581

509-
return builder.Build();
582+
return { builder.Build(), "" };
510583
}
511584

512-
TString BuildGetPointsCountUrl() const {
585+
std::tuple<TString, TString> BuildListMetricsLabelsHttpParams(const TSelectors& selectors, TInstant from, TInstant to) const {
513586
TUrlBuilder builder(GetHttpSolomonEndpoint());
514587

515588
builder.AddPathComponent("api");
516589
builder.AddPathComponent("v2");
517590
builder.AddPathComponent("projects");
518591
builder.AddPathComponent(Settings.GetProject());
519592
builder.AddPathComponent("sensors");
520-
builder.AddPathComponent("data");
593+
builder.AddPathComponent("labels");
521594

522595
builder.AddUrlParam("projectId", GetProjectId());
596+
builder.AddUrlParam("selectors", BuildSelectorsProgram(selectors));
597+
builder.AddUrlParam("from", from.ToString());
598+
builder.AddUrlParam("to", to.ToString());
599+
builder.AddUrlParam("limit", "100000");
523600

524-
return builder.Build();
601+
return { builder.Build(), "" };
525602
}
526603

527-
TString BuildGetPointsCountBody(const TString& program, TInstant from, TInstant to) const {
604+
std::tuple<TString, TString> BuildGetPointsCountHttpParams(const TString& program, TInstant from, TInstant to) const {
605+
TUrlBuilder builder(GetHttpSolomonEndpoint());
606+
607+
builder.AddPathComponent("api");
608+
builder.AddPathComponent("v2");
609+
builder.AddPathComponent("projects");
610+
builder.AddPathComponent(Settings.GetProject());
611+
builder.AddPathComponent("sensors");
612+
builder.AddPathComponent("data");
613+
614+
builder.AddUrlParam("projectId", GetProjectId());
615+
528616
const auto& ds = Settings.GetDownsampling();
529617
NJsonWriter::TBuf w;
530618
w.BeginObject()
531619
.UnsafeWriteKey("from").WriteString(from.ToString())
532620
.UnsafeWriteKey("to").WriteString(to.ToString())
533621
.UnsafeWriteKey("program").WriteString(program)
534-
.UnsafeWriteKey("forceCluster").WriteString(DefaultReplica)
535622
.UnsafeWriteKey("downsampling")
536623
.BeginObject()
537624
.UnsafeWriteKey("disabled").WriteBool(ds.GetDisabled());
@@ -544,7 +631,7 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
544631
}
545632
w.EndObject().EndObject();
546633

547-
return w.Str();
634+
return { builder.Build(), w.Str() };
548635
}
549636

550637
ReadRequest BuildGetDataRequest(const TString& program, TInstant from, TInstant to) const {
@@ -606,7 +693,8 @@ class TSolomonAccessorClient : public ISolomonAccessorClient, public std::enable
606693

607694
private:
608695
const TString DefaultReplica;
609-
const ui64 ListSizeLimit = 1ull << 20;
696+
const ui64 MaxListingPageSize;
697+
const ui64 ListSizeLimit = 100 * 1024 * 1024 * 8;
610698
const NYql::NSo::NProto::TDqSolomonSource Settings;
611699
const std::shared_ptr<NYdb::ICredentialsProvider> CredentialsProvider;
612700

@@ -630,12 +718,17 @@ ISolomonAccessorClient::Make(
630718
defaultReplica = it->second;
631719
}
632720

721+
ui64 maxListingPageSize = 20000;
722+
if (auto it = settings.find("maxListingPageSize"); it != settings.end()) {
723+
maxListingPageSize = FromString<ui64>(it->second);
724+
}
725+
633726
ui64 maxApiInflight = 40;
634727
if (auto it = settings.find("maxApiInflight"); it != settings.end()) {
635728
maxApiInflight = FromString<ui64>(it->second);
636729
}
637730

638-
return std::make_shared<TSolomonAccessorClient>(defaultReplica, maxApiInflight, std::move(source), credentialsProvider);
731+
return std::make_shared<TSolomonAccessorClient>(defaultReplica, maxListingPageSize, maxApiInflight, std::move(source), credentialsProvider);
639732
}
640733

641734
} // namespace NYql::NSo

ydb/library/yql/providers/solomon/solomon_accessor/client/solomon_accessor_client.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ class ISolomonAccessorClient {
2020

2121
public:
2222
virtual NThreading::TFuture<TGetLabelsResponse> GetLabelNames(const TSelectors& selectors, TInstant from, TInstant to) const = 0;
23-
virtual NThreading::TFuture<TListMetricsResponse> ListMetrics(const TSelectors& selectors, TInstant from, TInstant to, int pageSize, int page) const = 0;
23+
virtual NThreading::TFuture<TListMetricsResponse> ListMetrics(const TSelectors& selectors, TInstant from, TInstant to) const = 0;
24+
virtual NThreading::TFuture<TListMetricsLabelsResponse> ListMetricsLabels(const TSelectors& selectors, TInstant from, TInstant to) const = 0;
2425
virtual NThreading::TFuture<TGetPointsCountResponse> GetPointsCount(const TSelectors& selectors, TInstant from, TInstant to) const = 0;
2526
virtual NThreading::TFuture<TGetDataResponse> GetData(const TSelectors& selectors, TInstant from, TInstant to) const = 0;
2627
virtual NThreading::TFuture<TGetDataResponse> GetData(const TString& program, TInstant from, TInstant to) const = 0;

ydb/library/yql/providers/solomon/solomon_accessor/client/solomon_client_utils.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ TSolomonClientResponse<T>::TSolomonClientResponse(T&& result, ui64 downloadedByt
2525

2626
template class TSolomonClientResponse<TGetLabelsResult>;
2727
template class TSolomonClientResponse<TListMetricsResult>;
28+
template class TSolomonClientResponse<TListMetricsLabelsResult>;
2829
template class TSolomonClientResponse<TGetPointsCountResult>;
2930
template class TSolomonClientResponse<TGetDataResult>;
3031

0 commit comments

Comments
 (0)