Skip to content

Commit c30d5d0

Browse files
authored
Support refering to implicit @timestamp field in span (#4138)
* Support refering to implicit @timestamp field in time-based aggregations Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Update documentation of stats to reflect that span can be used without specifying a field Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Move @timestamp reference to AST layer - Additionally refactored visitTimechartCommand to reuse spanLiteral definition Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Unit test visitSpanLiteral, vistSpanClause, and visitTimechartParamter Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Revert changes to Span will always have a field with the current implementation Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> * Throw exception for zero span Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> --------- Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent f6bb654 commit c30d5d0

File tree

9 files changed

+384
-41
lines changed

9 files changed

+384
-41
lines changed

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@
8080
import org.opensearch.sql.ast.tree.Trendline;
8181
import org.opensearch.sql.ast.tree.UnresolvedPlan;
8282
import org.opensearch.sql.ast.tree.Values;
83+
import org.opensearch.sql.calcite.plan.OpenSearchConstants;
8384

8485
/** Class of static methods to create specific node instances. */
8586
@UtilityClass
@@ -491,15 +492,19 @@ public static Span spanFromSpanLengthLiteral(
491492
UnresolvedExpression field, Literal spanLengthLiteral) {
492493
if (spanLengthLiteral.getType() == DataType.STRING) {
493494
String spanText = spanLengthLiteral.getValue().toString();
494-
String valueStr = spanText.replaceAll("[^0-9]", "");
495-
String unitStr = spanText.replaceAll("[0-9]", "");
495+
String valueStr = spanText.replaceAll("[^0-9-]", "");
496+
String unitStr = spanText.replaceAll("[0-9-]", "");
496497

497498
if (valueStr.isEmpty()) {
498499
// No numeric value found, use the literal as-is
499500
return new Span(field, spanLengthLiteral, SpanUnit.NONE);
500501
} else {
501502
// Parse numeric value and unit
502503
Integer value = Integer.parseInt(valueStr);
504+
if (value <= 0) {
505+
throw new IllegalArgumentException(
506+
String.format("Zero or negative time interval not supported: %s", spanText));
507+
}
503508
SpanUnit unit = unitStr.isEmpty() ? SpanUnit.NONE : SpanUnit.of(unitStr);
504509
return span(field, intLiteral(value), unit);
505510
}
@@ -713,4 +718,9 @@ public static Bin bin(UnresolvedExpression field, Argument... arguments) {
713718
return DefaultBin.builder().field(field).alias(alias).build();
714719
}
715720
}
721+
722+
/** Get a reference to the implicit timestamp field {@code @timestamp} */
723+
public static Field referImplicitTimestampField() {
724+
return AstDSL.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP);
725+
}
716726
}

docs/user/ppl/cmd/stats.rst

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ stats [bucket_nullable=bool] <aggregation>... [by-clause]
5858

5959
* span-expression: optional, at most one.
6060

61-
* Syntax: span(field_expr, interval_expr)
62-
* Description: The unit of the interval expression is the natural unit by default. **If the field is a date/time type field, the aggregation results always ignore null bucket**. And the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``.
61+
* Syntax: span([field_expr,] interval_expr)
62+
* Description: The unit of the interval expression is the natural unit by default. If ``field_expr`` is omitted, span will use the implicit ``@timestamp`` field. An error will be thrown if this field doesn't exist. **If the field is a date/time type field, the aggregation results always ignore null bucket**. And the interval is in date/time units, you will need to specify the unit in the interval expression. For example, to split the field ``age`` into buckets by 10 years, it looks like ``span(age, 10)``. And here is another example of time span, the span to split a ``timestamp`` field into hourly intervals, it looks like ``span(timestamp, 1h)``.
6363
* Available time unit:
6464

6565
+----------------------------+
@@ -580,7 +580,7 @@ Description
580580

581581
Version: 3.3.0 (Calcite engine only)
582582

583-
Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
583+
Usage: LIST(expr). Collects all values from the specified expression into an array. Values are converted to strings, nulls are filtered, and duplicates are preserved.
584584
The function returns up to 100 values with no guaranteed ordering.
585585

586586
* expr: The field expression to collect values from.
@@ -977,3 +977,18 @@ PPL query::
977977
| 1 | 2025-01-01 | 2 |
978978
+-----+------------+--------+
979979

980+
981+
Example 18: Calculate the count by the implicit @timestamp field
982+
================================================================
983+
984+
This example demonstrates that if you omit the field parameter in the span function, it will automatically use the implicit ``@timestamp`` field.
985+
986+
PPL query::
987+
988+
PPL> source=big5 | stats count() by span(1month)
989+
fetched rows / total rows = 1/1
990+
+---------+---------------------+
991+
| count() | span(1month) |
992+
|---------+---------------------|
993+
| 1 | 2023-01-01 00:00:00 |
994+
+---------+---------------------+

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import static org.opensearch.sql.util.MatcherUtils.rows;
1717
import static org.opensearch.sql.util.MatcherUtils.schema;
1818
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
19+
import static org.opensearch.sql.util.MatcherUtils.verifyErrorMessageContains;
1920
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
2021
import static org.opensearch.sql.util.MatcherUtils.verifySchemaInOrder;
2122

@@ -25,6 +26,8 @@
2526
import org.json.JSONObject;
2627
import org.junit.jupiter.api.Test;
2728
import org.opensearch.client.Request;
29+
import org.opensearch.sql.common.utils.StringUtils;
30+
import org.opensearch.sql.exception.SemanticCheckException;
2831
import org.opensearch.sql.ppl.PPLIntegTestCase;
2932

3033
public class CalcitePPLAggregationIT extends PPLIntegTestCase {
@@ -41,6 +44,7 @@ public void init() throws Exception {
4144
loadIndex(Index.CALCS);
4245
loadIndex(Index.DATE_FORMATS);
4346
loadIndex(Index.DATA_TYPE_NUMERIC);
47+
loadIndex(Index.BIG5);
4448
loadIndex(Index.LOGS);
4549
loadIndex(Index.TELEMETRY);
4650
loadIndex(Index.TIME_TEST_DATA);
@@ -729,6 +733,23 @@ public void testCountBySpanForCustomFormats() throws IOException {
729733
verifyDataRows(actual, rows(1, "00:00:00"), rows(1, "12:00:00"));
730734
}
731735

736+
// Only available in v3 with Calcite
737+
@Test
738+
public void testSpanByImplicitTimestamp() throws IOException {
739+
JSONObject result = executeQuery("source=big5 | stats count() by span(1d) as span");
740+
verifySchema(result, schema("count()", "bigint"), schema("span", "timestamp"));
741+
verifyDataRows(result, rows(1, "2023-01-02 00:00:00"));
742+
743+
Throwable t =
744+
assertThrowsWithReplace(
745+
SemanticCheckException.class,
746+
() ->
747+
executeQuery(
748+
StringUtils.format(
749+
"source=%s | stats count() by span(5m)", TEST_INDEX_DATE_FORMATS)));
750+
verifyErrorMessageContains(t, "Field [@timestamp] not found");
751+
}
752+
732753
@Test
733754
public void testCountDistinct() throws IOException {
734755
JSONObject actual =
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
setup:
2+
- do:
3+
query.settings:
4+
body:
5+
transient:
6+
plugins.calcite.enabled : true
7+
- do:
8+
indices.create:
9+
index: test_timechart_span_validation
10+
body:
11+
mappings:
12+
properties:
13+
"@timestamp":
14+
type: date_nanos
15+
packets:
16+
type: long
17+
- do:
18+
bulk:
19+
index: test_timechart_span_validation
20+
refresh: true
21+
body:
22+
- '{"index": {}}'
23+
- '{"@timestamp": "2024-01-15T10:30:04.567890123Z", "packets": 100}'
24+
- '{"index": {}}'
25+
- '{"@timestamp": "2024-01-15T10:31:04.567890123Z", "packets": 150}'
26+
- '{"index": {}}'
27+
- '{"@timestamp": "2024-01-15T10:32:04.567890123Z", "packets": 120}'
28+
29+
---
30+
teardown:
31+
- do:
32+
query.settings:
33+
body:
34+
transient:
35+
plugins.calcite.enabled : false
36+
37+
---
38+
"timechart with zero span should return validation error":
39+
- skip:
40+
features:
41+
- headers
42+
- allowed_warnings
43+
- do:
44+
catch: bad_request
45+
headers:
46+
Content-Type: 'application/json'
47+
ppl:
48+
body:
49+
query: source=test_timechart_span_validation | timechart span=0m per_second(packets)
50+
- match: {"$body": "/Zero\\s+or\\s+negative\\s+time\\s+interval\\s+not\\s+supported/"}

ppl/src/main/antlr/OpenSearchPPLParser.g4

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -263,12 +263,8 @@ timechartCommand
263263
;
264264

265265
timechartParameter
266-
: (spanClause | SPAN EQUAL spanLiteral)
267-
| timechartArg
268-
;
269-
270-
timechartArg
271266
: LIMIT EQUAL integerLiteral
267+
| SPAN EQUAL spanLiteral
272268
| USEOTHER EQUAL (booleanLiteral | ident)
273269
;
274270

@@ -625,7 +621,7 @@ bySpanClause
625621
;
626622

627623
spanClause
628-
: SPAN LT_PRTHS fieldExpression COMMA value = spanLiteral RT_PRTHS
624+
: SPAN LT_PRTHS (fieldExpression COMMA)? value = spanLiteral RT_PRTHS
629625
;
630626

631627
sortbyClause

ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -621,39 +621,20 @@ public UnresolvedPlan visitReverseCommand(OpenSearchPPLParser.ReverseCommandCont
621621
@Override
622622
public UnresolvedPlan visitTimechartCommand(OpenSearchPPLParser.TimechartCommandContext ctx) {
623623
UnresolvedExpression binExpression =
624-
AstDSL.span(AstDSL.field("@timestamp"), AstDSL.intLiteral(1), SpanUnit.of("m"));
624+
AstDSL.span(AstDSL.referImplicitTimestampField(), AstDSL.intLiteral(1), SpanUnit.m);
625625
Integer limit = 10;
626626
Boolean useOther = true;
627627
// Process timechart parameters
628628
for (OpenSearchPPLParser.TimechartParameterContext paramCtx : ctx.timechartParameter()) {
629-
if (paramCtx.spanClause() != null) {
630-
binExpression = internalVisitExpression(paramCtx.spanClause());
631-
} else if (paramCtx.spanLiteral() != null) {
632-
Literal literal = (Literal) internalVisitExpression(paramCtx.spanLiteral());
633-
binExpression = AstDSL.spanFromSpanLengthLiteral(AstDSL.field("@timestamp"), literal);
634-
} else if (paramCtx.timechartArg() != null) {
635-
OpenSearchPPLParser.TimechartArgContext argCtx = paramCtx.timechartArg();
636-
if (argCtx.LIMIT() != null && argCtx.integerLiteral() != null) {
637-
limit = Integer.parseInt(argCtx.integerLiteral().getText());
638-
if (limit < 0) {
639-
throw new IllegalArgumentException("Limit must be a non-negative number");
640-
}
641-
} else if (argCtx.USEOTHER() != null) {
642-
if (argCtx.booleanLiteral() != null) {
643-
useOther = Boolean.parseBoolean(argCtx.booleanLiteral().getText());
644-
} else if (argCtx.ident() != null) {
645-
String useOtherValue = argCtx.ident().getText().toLowerCase();
646-
if ("true".equals(useOtherValue) || "t".equals(useOtherValue)) {
647-
useOther = true;
648-
} else if ("false".equals(useOtherValue) || "f".equals(useOtherValue)) {
649-
useOther = false;
650-
} else {
651-
throw new IllegalArgumentException(
652-
"Invalid useOther value: "
653-
+ argCtx.ident().getText()
654-
+ ". Expected true/false or t/f");
655-
}
656-
}
629+
UnresolvedExpression param = internalVisitExpression(paramCtx);
630+
if (param instanceof Span) {
631+
binExpression = param;
632+
} else if (param instanceof Literal literal) {
633+
if (DataType.BOOLEAN.equals(literal.getType())) {
634+
useOther = (Boolean) literal.getValue();
635+
} else if (DataType.INTEGER.equals(literal.getType())
636+
|| DataType.LONG.equals(literal.getType())) {
637+
limit = (Integer) literal.getValue();
657638
}
658639
}
659640
}

ppl/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,7 @@ public UnresolvedExpression visitSpanClause(SpanClauseContext ctx) {
636636
if (ctx.fieldExpression() != null) {
637637
fieldExpression = visit(ctx.fieldExpression());
638638
} else {
639-
fieldExpression = AstDSL.field("@timestamp");
639+
fieldExpression = AstDSL.referImplicitTimestampField();
640640
}
641641
Literal literal = (Literal) visit(ctx.value);
642642
return AstDSL.spanFromSpanLengthLiteral(fieldExpression, literal);
@@ -934,6 +934,47 @@ public UnresolvedExpression visitTimeModifierValue(
934934
return AstDSL.stringLiteral(osDateMathExpression);
935935
}
936936

937+
@Override
938+
public UnresolvedExpression visitTimechartParameter(
939+
OpenSearchPPLParser.TimechartParameterContext ctx) {
940+
UnresolvedExpression timechartParameter;
941+
if (ctx.SPAN() != null) {
942+
// Convert span=1h to span(@timestamp, 1h)
943+
Literal spanLiteral = (Literal) visit(ctx.spanLiteral());
944+
timechartParameter =
945+
AstDSL.spanFromSpanLengthLiteral(AstDSL.referImplicitTimestampField(), spanLiteral);
946+
} else if (ctx.LIMIT() != null) {
947+
Literal limit = (Literal) visit(ctx.integerLiteral());
948+
if ((Integer) limit.getValue() < 0) {
949+
throw new IllegalArgumentException("Limit must be a non-negative number");
950+
}
951+
timechartParameter = limit;
952+
} else if (ctx.USEOTHER() != null) {
953+
UnresolvedExpression useOther;
954+
if (ctx.booleanLiteral() != null) {
955+
useOther = visit(ctx.booleanLiteral());
956+
} else if (ctx.ident() != null) {
957+
QualifiedName ident = visitIdentifiers(List.of(ctx.ident()));
958+
String useOtherValue = ident.toString();
959+
if ("true".equalsIgnoreCase(useOtherValue) || "t".equalsIgnoreCase(useOtherValue)) {
960+
useOther = AstDSL.booleanLiteral(true);
961+
} else if ("false".equalsIgnoreCase(useOtherValue) || "f".equalsIgnoreCase(useOtherValue)) {
962+
useOther = AstDSL.booleanLiteral(false);
963+
} else {
964+
throw new IllegalArgumentException(
965+
"Invalid useOther value: " + ctx.ident().getText() + ". Expected true/false or t/f");
966+
}
967+
} else {
968+
throw new IllegalArgumentException("value for useOther must be a boolean or identifier");
969+
}
970+
timechartParameter = useOther;
971+
} else {
972+
throw new IllegalArgumentException(
973+
String.format("A parameter of timechart must be a span, limit or useOther, got %s", ctx));
974+
}
975+
return timechartParameter;
976+
}
977+
937978
/**
938979
* Process time range expressions (EARLIEST='value' or LATEST='value') It creates a Comparison
939980
* filter like @timestamp >= timeModifierValue

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTimechartTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
package org.opensearch.sql.ppl.calcite;
77

88
import static org.junit.Assert.assertNotNull;
9+
import static org.junit.Assert.assertThrows;
910

1011
import com.google.common.collect.ImmutableList;
1112
import java.util.List;
@@ -342,6 +343,13 @@ public void testTimechartWithUseOtherBeforeLimit() {
342343
assertNotNull(plan);
343344
}
344345

346+
@Test
347+
public void testTimechartUsingZeroSpanShouldThrow() {
348+
String ppl = "source=events | timechart span=0h limit=5 count() by host";
349+
Throwable t = assertThrows(IllegalArgumentException.class, () -> parsePPL(ppl));
350+
verifyErrorMessageContains(t, "Zero or negative time interval not supported: 0h");
351+
}
352+
345353
private UnresolvedPlan parsePPL(String query) {
346354
PPLSyntaxParser parser = new PPLSyntaxParser();
347355
AstBuilder astBuilder = new AstBuilder(query);

0 commit comments

Comments
 (0)