Skip to content

Commit b6f9694

Browse files
authored
fix(optimizer): avoid merging subquery with JOIN when outer query uses JOIN (#5999)
* fix(optimizer): prevent merging subqueries with joins when outer query uses a join * add test
1 parent c7657fb commit b6f9694

File tree

4 files changed

+49
-94
lines changed

4 files changed

+49
-94
lines changed

sqlglot/optimizer/merge_subqueries.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,7 @@ def _is_recursive():
201201
and not outer_scope.pivots
202202
and not any(e.find(exp.AggFunc, exp.Select, exp.Explode) for e in inner_select.expressions)
203203
and not (leave_tables_isolated and len(outer_scope.selected_sources) > 1)
204+
and not (isinstance(from_or_join, exp.Join) and inner_select.args.get("joins"))
204205
and not (
205206
isinstance(from_or_join, exp.Join)
206207
and inner_select.args.get("where")
@@ -283,13 +284,7 @@ def _merge_joins(outer_scope: Scope, inner_scope: Scope, from_or_join: FromOrJoi
283284

284285
joins = inner_scope.expression.args.get("joins") or []
285286

286-
outer_is_left_join = isinstance(from_or_join, exp.Join) and from_or_join.side == "LEFT"
287-
288287
for join in joins:
289-
if outer_is_left_join and not join.method and join.kind in ("", "INNER"):
290-
join.args.pop("kind", None)
291-
join.set("side", "LEFT")
292-
293288
new_joins.append(join)
294289
outer_scope.add_source(join.alias_or_name, inner_scope.sources[join.alias_or_name])
295290

tests/fixtures/optimizer/merge_subqueries.sql

Lines changed: 14 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -512,85 +512,30 @@ with cte as (
512512
select cte.mult from cte;
513513
SELECT x.a * x.b AS mult FROM x AS x;
514514

515-
# title: replace INNER JOIN with LEFT JOIN when it exists in a subquery used as the RHS of a LEFT JOIN
516-
WITH
517-
t0 AS (
518-
SELECT 5 as id
519-
),
520-
t1 AS (
521-
SELECT 1 AS id, 'US' AS cid
522-
),
523-
t2 AS (
524-
SELECT 1 AS id, 'US' AS cid
525-
)
526-
SELECT
527-
t0.id,
528-
t3.cid AS cid
529-
FROM t0
530-
LEFT JOIN (
515+
# title: avoid merging subquery with JOIN
516+
WITH t0 AS (
531517
SELECT
532-
t1.id,
533-
t2.cid
534-
FROM t1
535-
INNER JOIN t2
536-
ON t1.cid = t2.cid
537-
) AS t3
538-
ON t0.id = t3.id;
539-
WITH t0 AS (SELECT 5 AS id), t1 AS (SELECT 1 AS id, 'US' AS cid), t2 AS (SELECT 1 AS id, 'US' AS cid) SELECT t0.id AS id, t2.cid AS cid FROM t0 AS t0 LEFT JOIN t1 AS t1 ON t0.id = t1.id LEFT JOIN t2 AS t2 ON t1.cid = t2.cid;
540-
541-
# title: preserve INNER JOIN when it exists in a subquery used as the RHS of a RIGHT JOIN
542-
WITH
543-
t0 AS (
544-
SELECT 5 as id
545-
),
546-
t1 AS (
547-
SELECT 1 AS id, 'US' AS cid
548-
),
549-
t2 AS (
550-
SELECT 1 AS id, 'US' AS cid
518+
5 AS id
519+
), t1 AS (
520+
SELECT
521+
1 AS id,
522+
'US' AS cid
523+
), t2 AS (
524+
SELECT
525+
1 AS id,
526+
'US' AS cid
551527
)
552528
SELECT
553529
t0.id,
554530
t3.cid AS cid
555531
FROM t0
556-
RIGHT JOIN (
532+
INNER JOIN (
557533
SELECT
558534
t1.id,
559535
t2.cid
560536
FROM t1
561-
INNER JOIN t2
537+
RIGHT JOIN t2
562538
ON t1.cid = t2.cid
563539
) AS t3
564540
ON t0.id = t3.id;
565-
WITH t0 AS (SELECT 5 AS id), t1 AS (SELECT 1 AS id, 'US' AS cid), t2 AS (SELECT 1 AS id, 'US' AS cid) SELECT t0.id AS id, t2.cid AS cid FROM t0 AS t0 RIGHT JOIN t1 AS t1 ON t0.id = t1.id INNER JOIN t2 AS t2 ON t1.cid = t2.cid;
566-
567-
# title: replace multiple INNER JOINS with LEFT JOINS when they exist in a subquery used as the RHS of a LEFT JOIN
568-
WITH
569-
t0 AS (
570-
SELECT 5 as id
571-
),
572-
t1 AS (
573-
SELECT 1 AS id, 'US' AS cid
574-
),
575-
t2 AS (
576-
SELECT 1 AS id, 'US' AS cid
577-
),
578-
t3 AS (
579-
SELECT 1 AS id, 'CA' AS cid
580-
)
581-
SELECT
582-
t0.id,
583-
t4.cid AS cid
584-
FROM t0
585-
LEFT JOIN (
586-
SELECT
587-
t1.id,
588-
t3.cid
589-
FROM t1
590-
INNER JOIN t2
591-
ON t1.cid = t2.cid
592-
INNER JOIN t3
593-
ON t2.id = t3.id
594-
) AS t4
595-
ON t0.id = t4.id;
596-
WITH t0 AS (SELECT 5 AS id), t1 AS (SELECT 1 AS id, 'US' AS cid), t2 AS (SELECT 1 AS id, 'US' AS cid), t3 AS (SELECT 1 AS id, 'CA' AS cid) SELECT t0.id AS id, t3.cid AS cid FROM t0 AS t0 LEFT JOIN t1 AS t1 ON t0.id = t1.id LEFT JOIN t2 AS t2 ON t1.cid = t2.cid LEFT JOIN t3 AS t3 ON t2.id = t3.id;
541+
WITH t0 AS (SELECT 5 AS id), t1 AS (SELECT 1 AS id, 'US' AS cid), t2 AS (SELECT 1 AS id, 'US' AS cid) SELECT t0.id AS id, t3.cid AS cid FROM t0 AS t0 INNER JOIN (SELECT t1.id AS id, t2.cid AS cid FROM t1 AS t1 RIGHT JOIN t2 AS t2 ON t1.cid = t2.cid) AS t3 ON t0.id = t3.id;

tests/fixtures/optimizer/optimizer.sql

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -472,17 +472,22 @@ FROM (
472472
alias_1.id = alias_2.id
473473
)
474474
) AS main_query;
475+
WITH "alias_2" AS (
476+
SELECT
477+
"company_table_2"."id" AS "id"
478+
FROM "company_table" AS "company_table_2"
479+
LEFT JOIN "unlocked" AS "unlocked"
480+
ON "company_table_2"."id" = "unlocked"."company_id"
481+
WHERE
482+
CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE
483+
AND NOT "company_table_2"."id" IS NULL
484+
)
475485
SELECT
476486
"company_table"."id" AS "id",
477487
"company_table"."score" AS "score"
478488
FROM "company_table" AS "company_table"
479-
JOIN "company_table" AS "company_table_2"
480-
ON "company_table"."id" = "company_table_2"."id"
481-
LEFT JOIN "unlocked" AS "unlocked"
482-
ON "company_table_2"."id" = "unlocked"."company_id"
483-
WHERE
484-
CASE WHEN "unlocked"."company_id" IS NULL THEN 0 ELSE 1 END = FALSE
485-
AND NOT "company_table_2"."id" IS NULL;
489+
JOIN "alias_2" AS "alias_2"
490+
ON "alias_2"."id" = "company_table"."id";
486491

487492
# title: db.table alias clash
488493
# execute: false

tests/fixtures/optimizer/tpc-ds/tpc-ds.sql

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -198,24 +198,34 @@ WITH "wscs" AS (
198198
ON "date_dim"."d_date_sk" = "wscs"."sold_date_sk"
199199
GROUP BY
200200
"date_dim"."d_week_seq"
201+
), "z" AS (
202+
SELECT
203+
"wswscs"."d_week_seq" AS "d_week_seq2",
204+
"wswscs"."sun_sales" AS "sun_sales2",
205+
"wswscs"."mon_sales" AS "mon_sales2",
206+
"wswscs"."tue_sales" AS "tue_sales2",
207+
"wswscs"."wed_sales" AS "wed_sales2",
208+
"wswscs"."thu_sales" AS "thu_sales2",
209+
"wswscs"."fri_sales" AS "fri_sales2",
210+
"wswscs"."sat_sales" AS "sat_sales2"
211+
FROM "wswscs" AS "wswscs"
212+
JOIN "date_dim" AS "date_dim"
213+
ON "date_dim"."d_week_seq" = "wswscs"."d_week_seq" AND "date_dim"."d_year" = 1999
201214
)
202215
SELECT
203216
"wswscs"."d_week_seq" AS "d_week_seq1",
204-
ROUND("wswscs"."sun_sales" / "wswscs_2"."sun_sales", 2) AS "_col_1",
205-
ROUND("wswscs"."mon_sales" / "wswscs_2"."mon_sales", 2) AS "_col_2",
206-
ROUND("wswscs"."tue_sales" / "wswscs_2"."tue_sales", 2) AS "_col_3",
207-
ROUND("wswscs"."wed_sales" / "wswscs_2"."wed_sales", 2) AS "_col_4",
208-
ROUND("wswscs"."thu_sales" / "wswscs_2"."thu_sales", 2) AS "_col_5",
209-
ROUND("wswscs"."fri_sales" / "wswscs_2"."fri_sales", 2) AS "_col_6",
210-
ROUND("wswscs"."sat_sales" / "wswscs_2"."sat_sales", 2) AS "_col_7"
217+
ROUND("wswscs"."sun_sales" / "z"."sun_sales2", 2) AS "_col_1",
218+
ROUND("wswscs"."mon_sales" / "z"."mon_sales2", 2) AS "_col_2",
219+
ROUND("wswscs"."tue_sales" / "z"."tue_sales2", 2) AS "_col_3",
220+
ROUND("wswscs"."wed_sales" / "z"."wed_sales2", 2) AS "_col_4",
221+
ROUND("wswscs"."thu_sales" / "z"."thu_sales2", 2) AS "_col_5",
222+
ROUND("wswscs"."fri_sales" / "z"."fri_sales2", 2) AS "_col_6",
223+
ROUND("wswscs"."sat_sales" / "z"."sat_sales2", 2) AS "_col_7"
211224
FROM "wswscs" AS "wswscs"
212225
JOIN "date_dim" AS "date_dim"
213226
ON "date_dim"."d_week_seq" = "wswscs"."d_week_seq" AND "date_dim"."d_year" = 1998
214-
JOIN "wswscs" AS "wswscs_2"
215-
ON "wswscs"."d_week_seq" = "wswscs_2"."d_week_seq" - 53
216-
JOIN "date_dim" AS "date_dim_2"
217-
ON "date_dim_2"."d_week_seq" = "wswscs_2"."d_week_seq"
218-
AND "date_dim_2"."d_year" = 1999
227+
JOIN "z" AS "z"
228+
ON "wswscs"."d_week_seq" = "z"."d_week_seq2" - 53
219229
ORDER BY
220230
"d_week_seq1";
221231

0 commit comments

Comments
 (0)