Skip to content

Commit 6ed2c01

Browse files
opensearch-trigger-bot[bot]github-actions[bot]yuancu
authored
[Backport 2.19-dev] Allow renaming group-by fields to existing field names (#4653)
* Allow renaming group-by fields to existing field names (#4586) * Rename fields to intended ones after aggregation Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Add a defense check Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Remove defense check Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Handle cases where there exist duplicated group keys Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> --------- Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> (cherry picked from commit a86a5a7) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> * Downgrade language level to java 11 Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> --------- Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent 397892a commit 6ed2c01

File tree

2 files changed

+147
-1
lines changed

2 files changed

+147
-1
lines changed

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1013,12 +1013,60 @@ private Pair<List<RexNode>, List<AggCall>> aggregateWithTrimming(
10131013
Pair<List<RexNode>, List<AggCall>> reResolved =
10141014
resolveAttributesForAggregation(groupExprList, aggExprList, context);
10151015

1016+
List<String> intendedGroupKeyAliases = getGroupKeyNamesAfterAggregation(reResolved.getLeft());
10161017
context.relBuilder.aggregate(
10171018
context.relBuilder.groupKey(reResolved.getLeft()), reResolved.getRight());
1019+
// During aggregation, Calcite projects both input dependencies and output group-by fields.
1020+
// When names conflict, Calcite adds numeric suffixes (e.g., "value0").
1021+
// Apply explicit renaming to restore the intended aliases.
1022+
context.relBuilder.rename(intendedGroupKeyAliases);
10181023

10191024
return Pair.of(reResolved.getLeft(), reResolved.getRight());
10201025
}
10211026

1027+
/**
1028+
* Imitates {@code Registrar.registerExpression} of {@link RelBuilder} to derive the output order
1029+
* of group-by keys after aggregation.
1030+
*
1031+
* <p>The projected input reference comes first, while any other computed expression follows.
1032+
*/
1033+
private List<String> getGroupKeyNamesAfterAggregation(List<RexNode> nodes) {
1034+
List<RexNode> reordered = new ArrayList<>();
1035+
List<RexNode> left = new ArrayList<>();
1036+
for (RexNode n : nodes) {
1037+
// The same group-key won't be added twice
1038+
if (reordered.contains(n) || left.contains(n)) {
1039+
continue;
1040+
}
1041+
if (isInputRef(n)) {
1042+
reordered.add(n);
1043+
} else {
1044+
left.add(n);
1045+
}
1046+
}
1047+
reordered.addAll(left);
1048+
return reordered.stream()
1049+
.map(this::extractAliasLiteral)
1050+
.flatMap(Optional::stream)
1051+
.map(RexLiteral::stringValue)
1052+
.collect(Collectors.toList());
1053+
}
1054+
1055+
/** Whether a rex node is an aliased input reference */
1056+
private boolean isInputRef(RexNode node) {
1057+
switch (node.getKind()) {
1058+
case AS:
1059+
case DESCENDING:
1060+
case NULLS_FIRST:
1061+
case NULLS_LAST: {
1062+
final List<RexNode> operands = ((RexCall) node).operands;
1063+
return isInputRef(operands.get(0));
1064+
}
1065+
default:
1066+
return node instanceof RexInputRef;
1067+
}
1068+
}
1069+
10221070
/**
10231071
* Resolve attributes for aggregation.
10241072
*
@@ -1117,7 +1165,7 @@ public RelNode visitAggregation(Aggregation node, CalcitePlanContext context) {
11171165
aggregationAttributes.getLeft().stream()
11181166
.map(this::extractAliasLiteral)
11191167
.flatMap(Optional::stream)
1120-
.map(ref -> ((RexLiteral) ref).getValueAs(String.class))
1168+
.map(ref -> ref.getValueAs(String.class))
11211169
.map(context.relBuilder::field)
11221170
.map(f -> (RexNode) f)
11231171
.collect(Collectors.toList());
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: time_test
5+
- do:
6+
query.settings:
7+
body:
8+
transient:
9+
plugins.calcite.enabled : true
10+
11+
- do:
12+
bulk:
13+
refresh: true
14+
body:
15+
- '{"index": {"_index": "time_test"}}'
16+
- '{"category":"A","value":1000,"@timestamp":"2024-01-01T00:00:00Z"}'
17+
- '{"index": {"_index": "time_test"}}'
18+
- '{"category":"B","value":2000,"@timestamp":"2024-01-01T00:05:00Z"}'
19+
- '{"index": {"_index": "time_test"}}'
20+
- '{"category":"A","value":1500,"@timestamp":"2024-01-01T00:10:00Z"}'
21+
- '{"index": {"_index": "time_test"}}'
22+
- '{"category":"C","value":3000,"@timestamp":"2024-01-01T00:20:00Z"}'
23+
24+
---
25+
teardown:
26+
- do:
27+
query.settings:
28+
body:
29+
transient:
30+
plugins.calcite.enabled : false
31+
32+
---
33+
"Test span aggregation with field name collision - basic case":
34+
- skip:
35+
features:
36+
- headers
37+
- allowed_warnings
38+
- do:
39+
headers:
40+
Content-Type: 'application/json'
41+
ppl:
42+
body:
43+
query: source=time_test | stats count() by span(value, 1000) as value
44+
45+
- match: { total: 3 }
46+
- match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "value", "type": "bigint"}] }
47+
- match: { datarows: [[2, 1000], [1, 2000], [1, 3000]] }
48+
49+
---
50+
"Test span aggregation with field name collision - multiple aggregations":
51+
- skip:
52+
features:
53+
- headers
54+
- allowed_warnings
55+
- do:
56+
headers:
57+
Content-Type: 'application/json'
58+
ppl:
59+
body:
60+
query: source=time_test | stats count(), avg(value) by span(value, 1000) as value
61+
62+
- match: { total: 3 }
63+
- match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "avg(value)", "type": "double"}, {"name": "value", "type": "bigint"}] }
64+
- match: { datarows: [[2, 1250.0, 1000], [1, 2000.0, 2000], [1, 3000.0, 3000]] }
65+
66+
---
67+
"Test span aggregation without name collision - multiple group-by":
68+
- skip:
69+
features:
70+
- headers
71+
- allowed_warnings
72+
- do:
73+
headers:
74+
Content-Type: 'application/json'
75+
ppl:
76+
body:
77+
query: source=time_test | stats count() by span(@timestamp, 10min) as @timestamp, category, value
78+
79+
- match: { total: 4 }
80+
- match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "@timestamp", "type": "timestamp"}, {"name": "category", "type": "string"}, {"name": "value", "type": "bigint"}] }
81+
- match: { datarows: [[1, "2024-01-01 00:00:00", "A", 1000], [1, "2024-01-01 00:10:00", "A", 1500], [1, "2024-01-01 00:00:00", "B", 2000], [1, "2024-01-01 00:20:00", "C", 3000]] }
82+
83+
---
84+
"Test span aggregation with duplicated group keys":
85+
- skip:
86+
features:
87+
- headers
88+
- allowed_warnings
89+
- do:
90+
headers:
91+
Content-Type: 'application/json'
92+
ppl:
93+
body:
94+
query: source=time_test | stats count() by value, value, span(@timestamp, 10min) as @timestamp
95+
96+
- match: { total: 4 }
97+
- match: { schema: [{"name": "count()", "type": "bigint"}, {"name": "@timestamp", "type": "timestamp"}, {"name": "value", "type": "bigint"}, {"name": "value0", "type": "bigint"}] }
98+
- match: { datarows: [[1, "2024-01-01 00:00:00", 1000, 1000], [1, "2024-01-01 00:10:00", 1500, 1500], [1, "2024-01-01 00:00:00", 2000, 2000], [1, "2024-01-01 00:20:00", 3000, 3000]] }

0 commit comments

Comments
 (0)