Skip to content

Commit 60b7d98

Browse files
authored
Fix filter parsing failure on date fields with non-default format (#4616)
* Fix filter parsing failure on date fields with non-default format Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix IT Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix code after merging main Signed-off-by: Heng Qian <qianheng@amazon.com> --------- Signed-off-by: Heng Qian <qianheng@amazon.com>
1 parent 99e38ae commit 60b7d98

File tree

10 files changed

+199
-32
lines changed

10 files changed

+199
-32
lines changed

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ public void supportSearchSargPushDown_timeRange() throws IOException {
7070
"source=opensearch-sql_test_index_bank"
7171
+ "| where birthdate >= '2016-12-08 00:00:00.000000000' "
7272
+ "and birthdate < '2018-11-09 00:00:00.000000000'";
73-
var result = explainQueryToString(query);
74-
String expected = loadExpectedPlan("explain_sarg_filter_push_time_range.json");
75-
assertJsonEqualsIgnoreId(expected, result);
73+
var result = explainQueryYaml(query);
74+
String expected = loadExpectedPlan("explain_sarg_filter_push_time_range.yaml");
75+
assertYamlEqualsIgnoreId(expected, result);
7676
}
7777

7878
// Only for Calcite

integ-test/src/test/resources/expectedOutput/calcite/explain_filter_push_compare_timestamp_string.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ calcite:
66
LogicalFilter(condition=[>($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR))])
77
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
88
physical: |
9-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
9+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[('2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":false,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/explain_filter_with_search.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ calcite:
88
LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2023-01-01 00:00:00':VARCHAR)), <($3, TIMESTAMP('2023-01-03 00:00:00':VARCHAR)))])
99
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
1010
physical: |
11-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"boost":1.0}}},{"exists":{"field":"birthdate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
11+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[birthdate], FILTER->SEARCH($0, Sarg[['2023-01-01 00:00:00':VARCHAR..'2023-01-03 00:00:00':VARCHAR); NULL AS FALSE]:VARCHAR), AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0},count()=COUNT()), PROJECT->[count(), span(birthdate,1d)], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","query":{"bool":{"must":[{"range":{"birthdate":{"from":"2023-01-01T00:00:00.000Z","to":"2023-01-03T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},{"exists":{"field":"birthdate","boost":1.0}}],"adjust_pure_negative":true,"boost":1.0}},"_source":{"includes":["birthdate"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"span(birthdate,1d)":{"date_histogram":{"field":"birthdate","missing_bucket":false,"order":"asc","fixed_interval":"1d"}}}]}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/explain_sarg_filter_push_time_range.json

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])
5+
LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
7+
physical: |
8+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], FILTER->SEARCH($3, Sarg[['2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"range":{"birthdate":{"from":"2016-12-08T00:00:00.000Z","to":"2018-11-09T00:00:00.000Z","include_lower":true,"include_upper":false,"format":"date_time","boost":1.0}}},"_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_sarg_filter_push_time_range.json

Lines changed: 0 additions & 6 deletions
This file was deleted.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalProject(account_number=[$0], firstname=[$1], address=[$2], birthdate=[$3], gender=[$4], city=[$5], lastname=[$6], balance=[$7], employer=[$8], state=[$9], age=[$10], email=[$11], male=[$12])
5+
LogicalFilter(condition=[AND(>=($3, TIMESTAMP('2016-12-08 00:00:00.000000000':VARCHAR)), <($3, TIMESTAMP('2018-11-09 00:00:00.000000000':VARCHAR)))])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
7+
physical: |
8+
EnumerableLimit(fetch=[10000])
9+
EnumerableCalc(expr#0..18=[{inputs}], expr#19=[Sarg[['2016-12-08 00:00:00':VARCHAR..'2018-11-09 00:00:00':VARCHAR)]:VARCHAR], expr#20=[SEARCH($t3, $t19)], proj#0..12=[{exprs}], $condition=[$t20])
10+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: test
5+
body:
6+
mappings:
7+
properties:
8+
"startTimeMillis":
9+
type: date
10+
format: epoch_millis
11+
12+
- do:
13+
bulk:
14+
index: test
15+
refresh: true
16+
body:
17+
- '{"index": {"_id": "1"}}'
18+
- '{"startTimeMillis": 539325296000}'
19+
- '{"index": {"_id": "2"}}'
20+
- '{"startTimeMillis": "1715126504378"}'
21+
---
22+
"handle epoch_millis format field with equal":
23+
- skip:
24+
features:
25+
- headers
26+
- allowed_warnings
27+
- do:
28+
headers:
29+
Content-Type: 'application/json'
30+
ppl:
31+
body:
32+
query: source=test | eval example_time = STR_TO_DATE('1987-02-03 04:34:56', '%Y-%m-%d %H:%i:%S') | where startTimeMillis = example_time | fields startTimeMillis
33+
34+
- match: { total: 1 }
35+
- match: {"schema": [{"name": "startTimeMillis", "type": "timestamp"}]}
36+
- match: {"datarows": [["1987-02-03 04:34:56"]]}
37+
38+
---
39+
"handle epoch_millis format field with comparison":
40+
- skip:
41+
features:
42+
- headers
43+
- allowed_warnings
44+
- do:
45+
headers:
46+
Content-Type: 'application/json'
47+
ppl:
48+
body:
49+
query: source=test | eval example_time = STR_TO_DATE('1987-02-03 04:34:56', '%Y-%m-%d %H:%i:%S') | where startTimeMillis >= example_time | fields startTimeMillis
50+
51+
- match: { total: 2 }
52+
- match: {"schema": [{"name": "startTimeMillis", "type": "timestamp"}]}
53+
- match: {"datarows": [["1987-02-03 04:34:56"], ["2024-05-08 00:01:44.378"]]}

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1274,11 +1274,9 @@ public QueryExpression notLike(LiteralExpression literal) {
12741274
@Override
12751275
public QueryExpression equals(LiteralExpression literal) {
12761276
Object value = literal.value();
1277-
if (value instanceof GregorianCalendar) {
1277+
if (literal.isDateTime()) {
12781278
builder =
1279-
boolQuery()
1280-
.must(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value)))
1281-
.must(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lte(value)));
1279+
addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value).lte(value));
12821280
} else {
12831281
builder = termQuery(getFieldReferenceForTermQuery(), value);
12841282
}
@@ -1288,7 +1286,7 @@ public QueryExpression equals(LiteralExpression literal) {
12881286
@Override
12891287
public QueryExpression notEquals(LiteralExpression literal) {
12901288
Object value = literal.value();
1291-
if (value instanceof GregorianCalendar) {
1289+
if (literal.isDateTime()) {
12921290
builder =
12931291
boolQuery()
12941292
.should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value)))
@@ -1399,8 +1397,12 @@ public QueryExpression notIn(LiteralExpression literal) {
13991397

14001398
@Override
14011399
public QueryExpression equals(Object point, boolean isTimeStamp) {
1402-
builder =
1403-
termQuery(getFieldReferenceForTermQuery(), convertEndpointValue(point, isTimeStamp));
1400+
Object value = convertEndpointValue(point, isTimeStamp);
1401+
if (isTimeStamp) {
1402+
builder = rangeQuery(getFieldReference()).gte(value).lte(value).format("date_time");
1403+
} else {
1404+
builder = termQuery(getFieldReferenceForTermQuery(), value);
1405+
}
14041406
return this;
14051407
}
14061408

@@ -1419,6 +1421,7 @@ public QueryExpression between(Range<?> range, boolean isTimeStamp) {
14191421
range.upperBoundType() == BoundType.CLOSED
14201422
? rangeQueryBuilder.lte(upperBound)
14211423
: rangeQueryBuilder.lt(upperBound);
1424+
if (isTimeStamp) rangeQueryBuilder.format("date_time");
14221425
builder = rangeQueryBuilder;
14231426
return this;
14241427
}
@@ -1511,7 +1514,7 @@ public List<RexNode> getUnAnalyzableNodes() {
15111514
*/
15121515
private static RangeQueryBuilder addFormatIfNecessary(
15131516
LiteralExpression literal, RangeQueryBuilder rangeQueryBuilder) {
1514-
if (literal.value() instanceof GregorianCalendar) {
1517+
if (literal.isDateTime()) {
15151518
rangeQueryBuilder.format("date_time");
15161519
}
15171520
return rangeQueryBuilder;
@@ -1634,7 +1637,7 @@ Object value() {
16341637
return doubleValue();
16351638
} else if (isBoolean()) {
16361639
return booleanValue();
1637-
} else if (isTimestamp()) {
1640+
} else if (isTimestamp() || isDate()) {
16381641
return timestampValueForPushDown(RexLiteral.stringValue(literal));
16391642
} else if (isString()) {
16401643
return RexLiteral.stringValue(literal);
@@ -1676,10 +1679,21 @@ public boolean isTimestamp() {
16761679
return false;
16771680
}
16781681

1682+
public boolean isDate() {
1683+
if (literal.getType() instanceof ExprSqlType exprSqlType) {
1684+
return exprSqlType.getUdt() == ExprUDT.EXPR_DATE;
1685+
}
1686+
return false;
1687+
}
1688+
16791689
public boolean isIp() {
16801690
return literal.getType() instanceof ExprIPType;
16811691
}
16821692

1693+
public boolean isDateTime() {
1694+
return rawValue() instanceof GregorianCalendar || isTimestamp() || isDate();
1695+
}
1696+
16831697
long longValue() {
16841698
return ((Number) literal.getValue()).longValue();
16851699
}

0 commit comments

Comments
 (0)