Skip to content

Commit 5dd8387

Browse files
akoshchiyadriangb
authored andcommitted
fix: skip predicates on struct unnest in PushDownFilter (apache#16790)
* fix: skip predicates on struct unnest in FilterPushdown * doc comments * fix
1 parent 4777d16 commit 5dd8387

File tree

3 files changed

+54
-10
lines changed

3 files changed

+54
-10
lines changed

datafusion/optimizer/src/push_down_filter.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use std::collections::{HashMap, HashSet};
2121
use std::sync::Arc;
2222

23+
use arrow::datatypes::DataType;
2324
use indexmap::IndexSet;
2425
use itertools::Itertools;
2526

@@ -875,14 +876,37 @@ impl OptimizerRule for PushDownFilter {
875876
let predicates = split_conjunction_owned(filter.predicate.clone());
876877
let mut non_unnest_predicates = vec![];
877878
let mut unnest_predicates = vec![];
879+
let mut unnest_struct_columns = vec![];
880+
881+
for idx in &unnest.struct_type_columns {
882+
let (sub_qualifier, field) =
883+
unnest.input.schema().qualified_field(*idx);
884+
let field_name = field.name().clone();
885+
886+
if let DataType::Struct(children) = field.data_type() {
887+
for child in children {
888+
let child_name = child.name().clone();
889+
unnest_struct_columns.push(Column::new(
890+
sub_qualifier.cloned(),
891+
format!("{field_name}.{child_name}"),
892+
));
893+
}
894+
}
895+
}
896+
878897
for predicate in predicates {
879898
// collect all the Expr::Column in predicate recursively
880899
let mut accum: HashSet<Column> = HashSet::new();
881900
expr_to_columns(&predicate, &mut accum)?;
882901

883-
if unnest.list_type_columns.iter().any(|(_, unnest_list)| {
884-
accum.contains(&unnest_list.output_column)
885-
}) {
902+
let contains_list_columns =
903+
unnest.list_type_columns.iter().any(|(_, unnest_list)| {
904+
accum.contains(&unnest_list.output_column)
905+
});
906+
let contains_struct_columns =
907+
unnest_struct_columns.iter().any(|c| accum.contains(c));
908+
909+
if contains_list_columns || contains_struct_columns {
886910
unnest_predicates.push(predicate);
887911
} else {
888912
non_unnest_predicates.push(predicate);

datafusion/sqllogictest/test_files/push_down_filter.slt

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,31 @@ physical_plan
128128
06)----------ProjectionExec: expr=[column1@0 as column1, column2@1 as __unnest_placeholder(d.column2)]
129129
07)------------DataSourceExec: partitions=1, partition_sizes=[1]
130130

131+
statement ok
132+
drop table d;
131133

134+
statement ok
135+
CREATE TABLE d AS VALUES (named_struct('a', 1, 'b', 2)), (named_struct('a', 3, 'b', 4)), (named_struct('a', 5, 'b', 6));
136+
137+
query II
138+
select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5;
139+
----
140+
5 6
141+
142+
query TT
143+
explain select * from (select unnest(column1) from d) where "__unnest_placeholder(d.column1).b" > 5;
144+
----
145+
physical_plan
146+
01)CoalesceBatchesExec: target_batch_size=8192
147+
02)--FilterExec: __unnest_placeholder(d.column1).b@1 > 5
148+
03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
149+
04)------UnnestExec
150+
05)--------ProjectionExec: expr=[column1@0 as __unnest_placeholder(d.column1)]
151+
06)----------DataSourceExec: partitions=1, partition_sizes=[1]
132152

133153
statement ok
134154
drop table d;
135155

136-
137156
# Test push down filter with limit for parquet
138157
statement ok
139158
set datafusion.execution.parquet.pushdown_filters = true;

docs/source/user-guide/sql/special_functions.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Expands an array or map into rows.
6969
### `unnest (struct)`
7070

7171
Expand a struct fields into individual columns.
72+
Each field of the struct will be prefixed with `__unnest_placeholder` and could be accessed via `"__unnest_placeholder(<struct>).<field>"`.
7273

7374
#### Arguments
7475

@@ -91,10 +92,10 @@ Expand a struct fields into individual columns.
9192
+---------------------------+
9293

9394
> select unnest(struct_column) from foov;
94-
+------------------------------------------+------------------------------------------+
95-
| unnest_placeholder(foov.struct_column).a | unnest_placeholder(foov.struct_column).b |
96-
+------------------------------------------+------------------------------------------+
97-
| 5 | a string |
98-
| 6 | another string |
99-
+------------------------------------------+------------------------------------------+
95+
+--------------------------------------------+--------------------------------------------+
96+
| __unnest_placeholder(foov.struct_column).a | __unnest_placeholder(foov.struct_column).b |
97+
+--------------------------------------------+--------------------------------------------+
98+
| 5 | a string |
99+
| 6 | another string |
100+
+--------------------------------------------+--------------------------------------------+
100101
```

0 commit comments

Comments
 (0)