diff --git a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js index 32280f6d4e721..24b8d4ffa380b 100644 --- a/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js +++ b/packages/cubejs-schema-compiler/src/adapter/BaseQuery.js @@ -22,7 +22,6 @@ import { localTimestampToUtc, timeSeries as timeSeriesBase, timeSeriesFromCustomInterval, - parseSqlInterval, findMinGranularityDimension } from '@cubejs-backend/shared'; @@ -387,12 +386,59 @@ export class BaseQuery { } } + /** + * Is used by native + * This function follows the same logic as in this.collectJoinHints() + * @private + * @param {Array<(Array | string)>} hints + * @return {import('../compiler/JoinGraph').FinishedJoinTree} + */ + joinTreeForHints(hints) { + const explicitJoinHintMembers = new Set(hints.filter(j => Array.isArray(j)).flat()); + const queryJoinMaps = this.queryJoinMap(); + const newCollectedHints = []; + + const constructJH = () => R.uniq(this.enrichHintsWithJoinMap([ + ...newCollectedHints, + ...hints, + ], + queryJoinMaps)); + + let prevJoin = null; + let newJoin = null; + + // Safeguard against infinite loop in case of cyclic joins somehow managed to slip through + let cnt = 0; + let newJoinHintsCollectedCnt; + + do { + const allJoinHints = constructJH(); + prevJoin = newJoin; + newJoin = this.joinGraph.buildJoin(allJoinHints); + const allJoinHintsFlatten = new Set(allJoinHints.flat()); + const joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); + + const iterationCollectedHints = joinMembersJoinHints.filter(j => !allJoinHintsFlatten.has(j)); + newJoinHintsCollectedCnt = iterationCollectedHints.length; + cnt++; + if (newJoin) { + newCollectedHints.push(...joinMembersJoinHints.filter(j => !explicitJoinHintMembers.has(j))); + } + } while (newJoin?.joins.length > 0 && !this.isJoinTreesEqual(prevJoin, newJoin) && cnt < 10000 && newJoinHintsCollectedCnt > 0); + + if (cnt >= 10000) { + throw new UserError('Can not construct joins for the query, potential loop detected'); + } + + return newJoin; + } + cacheValue(key, fn, { contextPropNames, inputProps, cache } = {}) { const currentContext = this.safeEvaluateSymbolContext(); if (contextPropNames) { const contextKey = {}; - for (let i = 0; i < contextPropNames.length; i++) { - contextKey[contextPropNames[i]] = currentContext[contextPropNames[i]]; + for (const element of contextPropNames) { + contextKey[element] = currentContext[element]; } key = key.concat([JSON.stringify(contextKey)]); } @@ -436,83 +482,54 @@ export class BaseQuery { */ get allJoinHints() { if (!this.collectedJoinHints) { - const [rootOfJoin, ...allMembersJoinHints] = this.collectJoinHintsFromMembers(this.allMembersConcat(false)); - const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); - let joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(this.join)); - - // One cube may join the other cube via transitive joined cubes, - // members from which are referenced in the join `on` clauses. - // We need to collect such join hints and push them upfront of the joining one - // but only if they don't exist yet. Cause in other case we might affect what - // join path will be constructed in join graph. - // It is important to use queryLevelJoinHints during the calculation if it is set. - - const constructJH = () => { - const filteredJoinMembersJoinHints = joinMembersJoinHints.filter(m => !allMembersJoinHints.includes(m)); - return [ - ...this.queryLevelJoinHints, - ...(rootOfJoin ? [rootOfJoin] : []), - ...filteredJoinMembersJoinHints, - ...allMembersJoinHints, - ...customSubQueryJoinHints, - ]; - }; - - let prevJoins = this.join; - let prevJoinMembersJoinHints = joinMembersJoinHints; - let newJoin = this.joinGraph.buildJoin(constructJH()); - - const isOrderPreserved = (base, updated) => { - const common = base.filter(value => updated.includes(value)); - const bFiltered = updated.filter(value => common.includes(value)); - - return common.every((x, i) => x === bFiltered[i]); - }; - - const isJoinTreesEqual = (a, b) => { - if (!a || !b || a.root !== b.root || a.joins.length !== b.joins.length) { - return false; - } - - // We don't care about the order of joins on the same level, so - // we can compare them as sets. - const aJoinsSet = new Set(a.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); - const bJoinsSet = new Set(b.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); - - if (aJoinsSet.size !== bJoinsSet.size) { - return false; - } - - for (const val of aJoinsSet) { - if (!bJoinsSet.has(val)) { - return false; - } - } + this.collectedJoinHints = this.collectJoinHints(); + } + return this.collectedJoinHints; + } - return true; - }; + /** + * @private + * @return { Record} + */ + queryJoinMap() { + const queryMembers = this.allMembersConcat(false); + const joinMaps = {}; + + for (const member of queryMembers) { + const memberCube = member.cube?.(); + if (memberCube?.isView && !joinMaps[memberCube.name] && memberCube.joinMap) { + joinMaps[memberCube.name] = memberCube.joinMap; + } + } - // Safeguard against infinite loop in case of cyclic joins somehow managed to slip through - let cnt = 0; + return joinMaps; + } - while (newJoin?.joins.length > 0 && !isJoinTreesEqual(prevJoins, newJoin) && cnt < 10000) { - prevJoins = newJoin; - joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); - if (!isOrderPreserved(prevJoinMembersJoinHints, joinMembersJoinHints)) { - throw new UserError(`Can not construct joins for the query, potential loop detected: ${prevJoinMembersJoinHints.join('->')} vs ${joinMembersJoinHints.join('->')}`); - } - newJoin = this.joinGraph.buildJoin(constructJH()); - prevJoinMembersJoinHints = joinMembersJoinHints; - cnt++; + /** + * @private + * @param { (string|string[])[] } hints + * @param { Record} joinMap + * @return {(string|string[])[]} + */ + enrichHintsWithJoinMap(hints, joinMap) { + // Potentially, if joins between views would take place, we need to distinguish + // join maps on per view basis. + const allPaths = Object.values(joinMap).flat(); + + return hints.map(hint => { + if (Array.isArray(hint)) { + return hint; } - if (cnt >= 10000) { - throw new UserError('Can not construct joins for the query, potential loop detected'); + for (const path of allPaths) { + const hintIndex = path.indexOf(hint); + if (hintIndex !== -1) { + return path.slice(0, hintIndex + 1); + } } - this.collectedJoinHints = R.uniq(constructJH()); - } - return this.collectedJoinHints; + return hint; + }); } get dataSource() { @@ -2613,18 +2630,89 @@ export class BaseQuery { } /** - * + * Just a helper to avoid copy/paste + * @private + * @param {import('../compiler/JoinGraph').FinishedJoinTree} a + * @param {import('../compiler/JoinGraph').FinishedJoinTree} b + * @return {boolean} + */ + isJoinTreesEqual(a, b) { + if (!a || !b || a.root !== b.root || a.joins.length !== b.joins.length) { + return false; + } + + // We don't care about the order of joins on the same level, so + // we can compare them as sets. + const aJoinsSet = new Set(a.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); + const bJoinsSet = new Set(b.joins.map(j => `${j.originalFrom}->${j.originalTo}`)); + + if (aJoinsSet.size !== bJoinsSet.size) { + return false; + } + + for (const val of aJoinsSet) { + if (!bJoinsSet.has(val)) { + return false; + } + } + + return true; + } + + /** + * @private * @param {boolean} [excludeTimeDimensions=false] - * @returns {Array>} + * @returns {Array<(Array | string)>} */ collectJoinHints(excludeTimeDimensions = false) { - const membersToCollectFrom = [ - ...this.allMembersConcat(excludeTimeDimensions), - ...this.joinMembersFromJoin(this.join), - ...this.joinMembersFromCustomSubQuery(), - ]; + const allMembersJoinHints = this.collectJoinHintsFromMembers(this.allMembersConcat(excludeTimeDimensions)); + const explicitJoinHintMembers = new Set(allMembersJoinHints.filter(j => Array.isArray(j)).flat()); + const queryJoinMaps = this.queryJoinMap(); + const customSubQueryJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromCustomSubQuery()); + const newCollectedHints = []; + + // One cube may join the other cube via transitive joined cubes, + // members from which are referenced in the join `on` clauses. + // We need to collect such join hints and push them upfront of the joining one + // but only if they don't exist yet. Cause in other case we might affect what + // join path will be constructed in join graph. + // It is important to use queryLevelJoinHints during the calculation if it is set. + + const constructJH = () => R.uniq(this.enrichHintsWithJoinMap([ + ...this.queryLevelJoinHints, + ...newCollectedHints, + ...allMembersJoinHints, + ...customSubQueryJoinHints, + ], + queryJoinMaps)); + + let prevJoin = null; + let newJoin = null; + + // Safeguard against infinite loop in case of cyclic joins somehow managed to slip through + let cnt = 0; + let newJoinHintsCollectedCnt; + + do { + const allJoinHints = constructJH(); + prevJoin = newJoin; + newJoin = this.joinGraph.buildJoin(allJoinHints); + const allJoinHintsFlatten = new Set(allJoinHints.flat()); + const joinMembersJoinHints = this.collectJoinHintsFromMembers(this.joinMembersFromJoin(newJoin)); + + const iterationCollectedHints = joinMembersJoinHints.filter(j => !allJoinHintsFlatten.has(j)); + newJoinHintsCollectedCnt = iterationCollectedHints.length; + cnt++; + if (newJoin) { + newCollectedHints.push(...joinMembersJoinHints.filter(j => !explicitJoinHintMembers.has(j))); + } + } while (newJoin?.joins.length > 0 && !this.isJoinTreesEqual(prevJoin, newJoin) && cnt < 10000 && newJoinHintsCollectedCnt > 0); + + if (cnt >= 10000) { + throw new UserError('Can not construct joins for the query, potential loop detected'); + } - return this.collectJoinHintsFromMembers(membersToCollectFrom); + return constructJH(); } joinMembersFromCustomSubQuery() { diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts index 1161716d01378..e57efcb42a7f3 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeSymbols.ts @@ -133,6 +133,12 @@ export type AccessPolicyDefinition = { }; }; +export type ViewIncludedMember = { + type: string; + memberPath: string; + name: string; +}; + export interface CubeDefinition { name: string; extends?: (...args: Array) => { __cubeName: string }; @@ -159,7 +165,8 @@ export interface CubeDefinition { isView?: boolean; calendar?: boolean; isSplitView?: boolean; - includedMembers?: any[]; + includedMembers?: ViewIncludedMember[]; + joinMap?: string[][]; fileName?: string; } @@ -562,6 +569,8 @@ export class CubeSymbols implements TranspilerSymbolResolver { // `hierarchies` must be processed first const types = ['hierarchies', 'measures', 'dimensions', 'segments']; + const joinMap: string[][] = []; + for (const type of types) { let cubeIncludes: any[] = []; @@ -573,6 +582,11 @@ export class CubeSymbols implements TranspilerSymbolResolver { const split = fullPath.split('.'); const cubeRef = split[split.length - 1]; + // No need to keep a simple direct cube joins in join map + if (split.length > 1) { + joinMap.push(split); + } + if (it.includes === '*') { return it; } @@ -614,11 +628,7 @@ export class CubeSymbols implements TranspilerSymbolResolver { existing.map(({ type: t, memberPath, name }) => `${t}|${memberPath}|${name}`) ); - const additions: { - type: string; - memberPath: string; - name: string; - }[] = []; + const additions: ViewIncludedMember[] = []; for (const { member, name } of cubeIncludes) { const parts = member.split('.'); @@ -636,6 +646,8 @@ export class CubeSymbols implements TranspilerSymbolResolver { } } + cube.joinMap = joinMap; + [...memberSets.allMembers].filter(it => !memberSets.resolvedMembers.has(it)).forEach(it => { errorReporter.error(`Member '${it}' is included in '${cube.name}' but not defined in any cube`); }); diff --git a/packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts b/packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts index 17a75f78e66f3..c0f569f265ca4 100644 --- a/packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts +++ b/packages/cubejs-schema-compiler/src/compiler/DataSchemaCompiler.ts @@ -736,7 +736,7 @@ export class DataSchemaCompiler { if (e.toString().indexOf('SyntaxError') !== -1) { const err = e as SyntaxErrorInterface; const line = file.content.split('\n')[(err.loc?.start?.line || 1) - 1]; - const spaces = Array(err.loc?.start.column).fill(' ').join(''); + const spaces = Array(err.loc?.start?.column).fill(' ').join('') || ''; errorsReport.error(`Syntax error during parsing: ${err.message}:\n${line}\n${spaces}^`, file.fileName); } else { errorsReport.error(e); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts index 64e1d54b3cb02..470876605c21b 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/sql-generation.test.ts @@ -5198,7 +5198,7 @@ cubes: sql: amount type: sum -# Join loop for testing transitive joins +# Model for testing multiple joins to the same cube via transitive joins - name: alpha_facts sql: | ( @@ -5215,7 +5215,7 @@ cubes: sql: "{CUBE}.b_id = {gamma_dims.b_id}" - name: delta_bridge relationship: many_to_one - sql: "{beta_dims.a_name} = {delta_bridge.a_name} AND {gamma_dims.b_name} = {delta_bridge.b_name}" + sql: "{beta_dims.a_name} = {delta_bridge.a_name} AND {gamma_dims.b_name} = {delta_bridge.c_name}" dimensions: - name: reporting_date sql: reporting_date @@ -5256,9 +5256,9 @@ cubes: - name: gamma_dims sql: | ( - SELECT 10 AS b_id, 'Beta1' AS b_name + SELECT 10 AS b_id, 'Beta1' AS b_name, 'Gamma1' AS c_name UNION ALL - SELECT 20 AS b_id, 'Beta2' AS b_name + SELECT 20 AS b_id, 'Beta2' AS b_name, 'Gamma2' AS c_name ) dimensions: - name: b_id @@ -5272,16 +5272,16 @@ cubes: - name: delta_bridge sql: | ( - SELECT 'Alpha1' AS a_name, 'Beta1' AS b_name, 'Organic' AS channel + SELECT 'Alpha1' AS a_name, 'Beta1' AS b_name, 'Beta1' AS c_name, 'Organic' AS channel UNION ALL - SELECT 'Alpha1' AS a_name, 'Beta2' AS b_name, 'Paid' AS channel + SELECT 'Alpha2' AS a_name, 'Beta2' AS b_name, 'Beta2' AS c_name, 'Paid' AS channel UNION ALL - SELECT 'Alpha2' AS a_name, 'Beta1' AS b_name, 'Referral' AS channel + SELECT 'Alpha1' AS a_name, 'Beta1' AS b_name, 'Beta3' AS c_name, 'Referral' AS channel ) joins: - name: gamma_dims relationship: many_to_one - sql: "{CUBE}.b_name = {gamma_dims.b_name}" + sql: "{CUBE}.c_name = {gamma_dims.c_name}" dimensions: - name: a_name sql: a_name @@ -5290,87 +5290,86 @@ cubes: - name: b_name sql: "{gamma_dims.b_name}" type: string - primary_key: true + - name: c_name + sql: c_name + type: string - name: channel sql: channel type: string `); - if (!getEnv('nativeSqlPlanner')) { - it('querying cube dimension that require transitive joins', async () => { - await compiler.compile(); - const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { - measures: [], - dimensions: [ - 'test_facts.reporting_date', - 'test_facts.merchant_sk', - 'test_facts.product_sk', - 'test_facts.acquisition_channel' - ], - order: [{ - id: 'test_facts.acquisition_channel' - }], - timezone: 'America/Los_Angeles' - }); + it('querying cube dimension that require transitive joins', async () => { + await compiler.compile(); + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'test_facts.reporting_date', + 'test_facts.merchant_sk', + 'test_facts.product_sk', + 'test_facts.acquisition_channel' + ], + order: [{ + id: 'test_facts.acquisition_channel' + }], + timezone: 'America/Los_Angeles' + }); - const res = await dbRunner.testQuery(query.buildSqlAndParams()); - console.log(JSON.stringify(res)); + const res = await dbRunner.testQuery(query.buildSqlAndParams()); + console.log(JSON.stringify(res)); - expect(res).toEqual([ - { - test_facts__acquisition_channel: 'Organic', - test_facts__merchant_sk: 101, - test_facts__product_sk: 201, - test_facts__reporting_date: '2023-01-01T00:00:00.000Z', - }, - { - test_facts__acquisition_channel: 'Paid', - test_facts__merchant_sk: 101, - test_facts__product_sk: 202, - test_facts__reporting_date: '2023-01-01T00:00:00.000Z', - }, - { - test_facts__acquisition_channel: 'Referral', - test_facts__merchant_sk: 102, - test_facts__product_sk: 201, - test_facts__reporting_date: '2023-01-02T00:00:00.000Z', - }, - ]); - }); - } else { - it.skip('FIXME(tesseract): querying cube dimension that require transitive joins', async () => { - // FIXME should be implemented in Tesseract - }); - } + expect(res).toEqual([ + { + test_facts__acquisition_channel: 'Organic', + test_facts__merchant_sk: 101, + test_facts__product_sk: 201, + test_facts__reporting_date: '2023-01-01T00:00:00.000Z', + }, + { + test_facts__acquisition_channel: 'Paid', + test_facts__merchant_sk: 101, + test_facts__product_sk: 202, + test_facts__reporting_date: '2023-01-01T00:00:00.000Z', + }, + { + test_facts__acquisition_channel: 'Referral', + test_facts__merchant_sk: 102, + test_facts__product_sk: 201, + test_facts__reporting_date: '2023-01-02T00:00:00.000Z', + }, + ]); + }); - if (!getEnv('nativeSqlPlanner')) { - it('querying cube with transitive joins with loop', async () => { - await compiler.compile(); - - try { - const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { - measures: [], - dimensions: [ - 'alpha_facts.reporting_date', - 'delta_bridge.b_name', - 'alpha_facts.channel' - ], - order: [{ - id: 'alpha_facts.reporting_date' - }], - timezone: 'America/Los_Angeles' - }); - - await dbRunner.testQuery(query.buildSqlAndParams()); - throw new Error('Should have thrown an error'); - } catch (err: any) { - expect(err.message).toContain('Can not construct joins for the query, potential loop detected'); - } - }); - } else { - it.skip('FIXME(tesseract): querying cube dimension that require transitive joins', async () => { - // FIXME should be implemented in Tesseract + it('querying cube with transitive joins with a few joins to the same cube', async () => { + await compiler.compile(); + + const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, { + measures: [], + dimensions: [ + 'alpha_facts.reporting_date', + 'delta_bridge.b_name', + 'alpha_facts.channel' + ], + order: [{ + id: 'alpha_facts.reporting_date' + }], + timezone: 'America/Los_Angeles' }); - } + + const res = await dbRunner.testQuery(query.buildSqlAndParams()); + console.log(JSON.stringify(res)); + + expect(res).toEqual([ + { + alpha_facts__channel: 'Organic', + alpha_facts__reporting_date: '2023-01-01T00:00:00.000Z', + delta_bridge__b_name: 'Beta1', + }, + { + alpha_facts__channel: 'Paid', + alpha_facts__reporting_date: '2023-01-02T00:00:00.000Z', + delta_bridge__b_name: 'Beta2', + }, + ]); + }); }); }); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-2.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-2.test.ts index 20dac065bec76..0eb9b7f5f5d00 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-2.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-2.test.ts @@ -1,6 +1,6 @@ -import { getEnv } from '@cubejs-backend/shared'; import { prepareJsCompiler } from '../../unit/PrepareCompiler'; import { dbRunner } from './PostgresDBRunner'; +import { transformResultsForTesseractIfNeeded } from '../../unit/utils'; describe('Views Join Order 2', () => { jest.setTimeout(200000); @@ -60,14 +60,14 @@ cube('A', { cube('B', { sql: \` - SELECT 2 id, 'b'::text as "name"\`, + SELECT 2 id, 4 as fk_e, 'b'::text as "name"\`, joins: { A: { relationship: \`many_to_one\`, sql: \`\${CUBE.fk} = \${A.id}\`, }, E: { - sql: \`\${CUBE.name} = \${E.id}\`, + sql: \`\${CUBE}.fk_e = \${E.id}\`, relationship: \`many_to_one\`, }, }, @@ -144,61 +144,31 @@ cube('D', { }); `); - if (getEnv('nativeSqlPlanner')) { - it('join order', async () => dbRunner.runQueryTest({ - dimensions: [ - 'View.A_id', - 'View.A_name', - 'View.B_id', - 'View.B_name', - 'View.D_id', - 'View.D_name', - 'View.E_id', - 'View.E_name' - ], - timeDimensions: [], - segments: [], - filters: [], - total: true, - renewQuery: false, - limit: 1 - }, [{ - view__a_id: 1, - view__a_name: 'a', - view__b_id: 2, - view__b_name: 'b', - view__d_id: 3, - view__d_name: 'd', - view__e_id: 4, - view__e_name: 'e', - }], { compiler, joinGraph, cubeEvaluator })); - } else { - it('join order', async () => dbRunner.runQueryTest({ - dimensions: [ - 'View.A_id', - 'View.A_name', - 'View.B_id', - 'View.B_name', - 'View.D_id', - 'View.D_name', - 'View.E_id', - 'View.E_name' - ], - timeDimensions: [], - segments: [], - filters: [], - total: true, - renewQuery: false, - limit: 1 - }, [{ - view___a_id: 1, - view___a_name: 'a', - view___b_id: 2, - view___b_name: 'b', - view___d_id: 3, - view___d_name: 'd', - view___e_id: 4, - view___e_name: 'e', - }], { compiler, joinGraph, cubeEvaluator })); - } + it('join order', async () => dbRunner.runQueryTest({ + dimensions: [ + 'View.A_id', + 'View.A_name', + 'View.B_id', + 'View.B_name', + 'View.D_id', + 'View.D_name', + 'View.E_id', + 'View.E_name' + ], + timeDimensions: [], + segments: [], + filters: [], + total: true, + renewQuery: false, + limit: 1 + }, transformResultsForTesseractIfNeeded([{ + view___a_id: 1, + view___a_name: 'a', + view___b_id: 2, + view___b_name: 'b', + view___d_id: 3, + view___d_name: 'd', + view___e_id: 4, + view___e_name: 'e', + }]), { compiler, joinGraph, cubeEvaluator })); }); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-3.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-3.test.ts new file mode 100644 index 0000000000000..0c87f01bf2b58 --- /dev/null +++ b/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-3.test.ts @@ -0,0 +1,178 @@ +import { prepareJsCompiler } from '../../unit/PrepareCompiler'; +import { dbRunner } from './PostgresDBRunner'; +import { transformResultsForTesseractIfNeeded } from '../../unit/utils'; + +/** + * This tests the cube join correctness for cases, when there are + * multiple equal-cost paths between few cubes via transitive joins. + */ + +describe('Views Join Order 3', () => { + jest.setTimeout(200000); + + const { compiler, joinGraph, cubeEvaluator } = prepareJsCompiler( + // language=JavaScript + ` +cube(\`D\`, { + sql: \`SELECT 1 id, 125 as balance\`, + dimensions: { + Balance: { + sql: \`balance\`, + type: \`number\` + } + } +}); + +cube(\`A\`, { + sql: \`SELECT 1 id, 250 as balance\`, + joins: { + E: { + sql: \`\${CUBE}.id = \${E}.id\`, + relationship: \`many_to_one\` + }, + B: { + sql: \`\${CUBE}.id = \${B}.id\`, + relationship: \`many_to_one\` + }, + C: { + sql: \`\${CUBE}.id = \${C}.id\`, + relationship: \`many_to_one\` + } + }, + dimensions: { + Balance: { + sql: \`balance\`, + type: \`number\` + } + } +}); + +cube('B', { + sql: \`SELECT 1 id\`, + joins: { + D: { + sql: \`\${CUBE}.id = \${D}.id\`, + relationship: \`many_to_one\` + }, + E: { + sql: \`\${CUBE}.id = \${E}.id\`, + relationship: \`many_to_one\` + } + }, + dimensions: { + ActivityBalance: { + sql: \`\${D.Balance}\`, + type: \`number\` + } + } +}); + +cube(\`E\`, { + sql: \`SELECT 1 id, 1 as plan_id, 1 as party_id\`, + joins: { + D: { + sql: \`\${CUBE}.id = \${D}.id\`, + relationship: \`many_to_one\` + }, + F: { + sql: \`\${CUBE}.plan_id = \${F}.plan_id\`, + relationship: \`many_to_one\` + }, + C: { + sql: \`\${CUBE}.party_id = \${C}.party_id\`, + relationship: \`many_to_one\` + } + } +}); + +cube('C', { + sql: \`SELECT 1 id, 1 as plan_id, 1 as party_id\`, + joins: { + F: { + sql: \`\${CUBE}.plan_id = \${F}.plan_id\`, + relationship: \`many_to_one\` + } + } +}); + +cube(\`F\`, { + sql: \`SELECT 1 id, 1 as plan_id, 'PLAN_CODE'::text as plan_code\`, + dimensions: { + PlanCode: { + sql: \`plan_code\`, + type: \`string\` + } + } +}); + +view(\`V\`, { + cubes: [ + { + join_path: A.B, + includes: [\`ActivityBalance\`] + }, + { + join_path: A.C.F, + includes: [\`PlanCode\`] + } + ] +}); + ` + ); + + it('correct join for simple cube B dimension', async () => { + const [sql, _params] = await dbRunner.runQueryTest({ + dimensions: ['B.ActivityBalance'], + timeDimensions: [], + segments: [], + filters: [], + }, transformResultsForTesseractIfNeeded([{ + b___activity_balance: 125, + }]), { compiler, joinGraph, cubeEvaluator }); + + expect(sql).toMatch(/AS "b"/); + expect(sql).toMatch(/AS "d"/); + expect(sql).toMatch(/ON "b".id = "d".id/); + expect(sql).not.toMatch(/AS "a"/); + expect(sql).not.toMatch(/AS "e"/); + expect(sql).not.toMatch(/AS "c"/); + }); + + it('correct join for simple view B-dimension', async () => dbRunner.runQueryTest({ + dimensions: ['V.ActivityBalance'], + timeDimensions: [], + segments: [], + filters: [], + }, transformResultsForTesseractIfNeeded([{ + v___activity_balance: 125, + }]), { compiler, joinGraph, cubeEvaluator })); + + it('correct join for simple view F-dimension', async () => dbRunner.runQueryTest({ + dimensions: ['V.PlanCode'], + timeDimensions: [], + segments: [], + filters: [], + }, transformResultsForTesseractIfNeeded([{ + v___plan_code: 'PLAN_CODE', + }]), { compiler, joinGraph, cubeEvaluator })); + + it('correct join for view F-dimension + B-dimension', async () => dbRunner.runQueryTest({ + dimensions: ['V.PlanCode', 'V.ActivityBalance'], + timeDimensions: [], + segments: [], + filters: [], + }, transformResultsForTesseractIfNeeded([{ + v___plan_code: 'PLAN_CODE', + v___activity_balance: 125, + }]), { compiler, joinGraph, cubeEvaluator })); + + it('correct join for view B-dimension + F-dimension', async () => dbRunner.runQueryTest({ + dimensions: ['V.ActivityBalance', 'V.PlanCode'], + timeDimensions: [], + segments: [], + filters: [], + }, transformResultsForTesseractIfNeeded([{ + v___activity_balance: 125, + v___plan_code: 'PLAN_CODE', + }]), { compiler, joinGraph, cubeEvaluator })); +}); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-join-maps.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-join-maps.test.ts new file mode 100644 index 0000000000000..975ab47c9b1d4 --- /dev/null +++ b/packages/cubejs-schema-compiler/test/integration/postgres/views-join-order-join-maps.test.ts @@ -0,0 +1,212 @@ +import { prepareJsCompiler } from '../../unit/PrepareCompiler'; +import { dbRunner } from './PostgresDBRunner'; +import { transformResultsForTesseractIfNeeded } from '../../unit/utils'; + +describe('Views Join Order using join maps', () => { + jest.setTimeout(200000); + + const { compiler, joinGraph, cubeEvaluator } = prepareJsCompiler( + // language=JavaScript + ` +view(\`View\`, { + description: 'A view', + cubes: [ + { + join_path: A, + includes: ['id', 'name', 'c_name', 'd_name'], + prefix: true, + }, + { + join_path: A.B.C.D, + includes: ['id', 'name'], + prefix: true, + }, + ], +}); + +cube('A', { + sql: \` + SELECT 1 id, 'a'::text as "name"\`, + joins: { + B: { + relationship: \`one_to_many\`, + sql: \`\${CUBE.id} = \${B}.fk\`, + }, + C: { + relationship: \`one_to_many\`, + sql: \`\${CUBE.id} = \${C}.fk_a\`, + }, + D: { + relationship: \`one_to_many\`, + sql: \`\${CUBE.id} = \${D}.fk\`, + }, + }, + dimensions: { + id: { + sql: \`id\`, + type: \`string\`, + primaryKey: true, + }, + name: { + sql: \`\${CUBE}."name"\`, + type: \`string\`, + }, + c_name: { + sql: \`\${C.name}\`, + type: \`string\`, + }, + d_name: { + sql: \`\${D.name}\`, + type: \`string\`, + }, + }, +}); + +cube('B', { + sql: \` + SELECT 2 id, 1 as fk, 'b'::text as "name"\`, + joins: { + C: { + relationship: \`many_to_one\`, + sql: \`\${CUBE.id} = \${C}.fk\`, + }, + }, + + dimensions: { + id: { + sql: \`id\`, + type: \`string\`, + primaryKey: true, + }, + name: { + sql: \`\${CUBE}."name"\`, + type: \`string\`, + }, + }, +}); + +cube('C', { + sql: \` + SELECT 3 id, 2 as fk, 2 as fk_a, 'c1'::text as "name" + UNION ALL + SELECT 4 id, 3 as fk, 1 as fk_a, 'c2'::text as "name"\`, + joins: { + D: { + relationship: \`many_to_one\`, + sql: \`\${CUBE.id} = \${D}.fk_d\`, + }, + }, + dimensions: { + id: { + sql: \`id\`, + type: \`string\`, + primaryKey: true, + }, + name: { + sql: \`\${CUBE}."name"\`, + type: \`string\`, + }, + }, +}); + +cube('D', { + sql: \` + SELECT 4 id, 1 as fk, 1 fk_d, 'd1'::text as "name" + UNION ALL + SELECT 5 id, 3 as fk, 3 fk_d, 'd3'::text as "name"\`, + dimensions: { + id: { + sql: \`id\`, + type: \`string\`, + primaryKey: true, + }, + name: { + sql: \`\${CUBE}."name"\`, + type: \`string\`, + }, + }, +}); + ` + ); + + it('querying A member proxied to leaf D', async () => { + const [sql, _params] = await dbRunner.runQueryTest({ + dimensions: [ + 'View.A_id', + 'View.A_name', + 'View.A_d_name', + ], + timeDimensions: [], + segments: [], + filters: [], + total: true, + }, transformResultsForTesseractIfNeeded([{ + view___a_id: 1, + view___a_name: 'a', + view___a_d_name: 'd3', + }]), { compiler, joinGraph, cubeEvaluator }); + + expect(sql).toMatch(/AS "b"/); + expect(sql).toMatch(/AS "c"/); + expect(sql).toMatch(/AS "d"/); + expect(sql).toMatch(/ON "a".id = "b".fk/); + expect(sql).toMatch(/ON "b".id = "c".fk/); + expect(sql).toMatch(/ON "c".id = "d".fk_d/); + expect(sql).not.toMatch(/ON "a".id = "d".fk/); + }); + + it('querying A member proxied to non-leaf C', async () => { + const [sql, _params] = await dbRunner.runQueryTest({ + dimensions: [ + 'View.A_id', + 'View.A_name', + 'View.A_c_name', + ], + timeDimensions: [], + segments: [], + filters: [], + total: true, + }, transformResultsForTesseractIfNeeded([{ + view___a_id: 1, + view___a_name: 'a', + view___a_c_name: 'c1', + }]), { compiler, joinGraph, cubeEvaluator }); + + expect(sql).toMatch(/AS "b"/); + expect(sql).toMatch(/AS "c"/); + expect(sql).toMatch(/ON "a".id = "b".fk/); + expect(sql).toMatch(/ON "b".id = "c".fk/); + expect(sql).not.toMatch(/ON "c".id = "d".fk_d/); + expect(sql).not.toMatch(/AS "d"/); + expect(sql).not.toMatch(/ON "a".id = "c".fk_a/); + }); + + it('querying A member proxied to non-leaf C', async () => { + const [sql, _params] = await dbRunner.runQueryTest({ + dimensions: [ + 'View.A_id', + 'View.A_name', + 'View.A_c_name', + 'View.A_d_name', + ], + timeDimensions: [], + segments: [], + filters: [], + total: true, + }, transformResultsForTesseractIfNeeded([{ + view___a_id: 1, + view___a_name: 'a', + view___a_c_name: 'c1', + view___a_d_name: 'd3', + }]), { compiler, joinGraph, cubeEvaluator }); + + expect(sql).toMatch(/AS "b"/); + expect(sql).toMatch(/AS "c"/); + expect(sql).toMatch(/AS "d"/); + expect(sql).toMatch(/ON "a".id = "b".fk/); + expect(sql).toMatch(/ON "b".id = "c".fk/); + expect(sql).toMatch(/ON "c".id = "d".fk_d/); + expect(sql).not.toMatch(/ON "a".id = "c".fk_a/); + expect(sql).not.toMatch(/ON "a".id = "d".fk/); + }); +}); diff --git a/packages/cubejs-schema-compiler/test/unit/__snapshots__/schema.test.ts.snap b/packages/cubejs-schema-compiler/test/unit/__snapshots__/schema.test.ts.snap index 7f79bc7421f29..b0742584817db 100644 --- a/packages/cubejs-schema-compiler/test/unit/__snapshots__/schema.test.ts.snap +++ b/packages/cubejs-schema-compiler/test/unit/__snapshots__/schema.test.ts.snap @@ -1865,6 +1865,7 @@ Object { }, ], "isView": true, + "joinMap": Array [], "joins": Array [], "measures": Object { "count": Object { diff --git a/packages/cubejs-schema-compiler/test/unit/error-reporter.test.ts b/packages/cubejs-schema-compiler/test/unit/error-reporter.test.ts index 889d34727d0ba..a846bacbda5d8 100644 --- a/packages/cubejs-schema-compiler/test/unit/error-reporter.test.ts +++ b/packages/cubejs-schema-compiler/test/unit/error-reporter.test.ts @@ -11,7 +11,7 @@ describe('ErrorReporter', () => { // Test inFile and exitFile reporter.inFile({ fileName: 'schema/users.js', - content: `cube('Users', {\n sql: \`SELECT * FROM users\`,\n measures: {\n count: {\n type: 'count'\n }\n }\n});` + content: 'cube(\'Users\', {\n sql: `SELECT * FROM users`,\n measures: {\n count: {\n type: \'count\'\n }\n }\n});' }); // Test syntaxError with location @@ -45,7 +45,7 @@ describe('ErrorReporter', () => { // Test inFile for another file reporter.inFile({ fileName: 'schema/orders.js', - content: `cube('Orders', {\n sql: \`SELECT * FROM orders\`\n});` + content: 'cube(\'Orders\', {\n sql: `SELECT * FROM orders`\n});' }); // Test syntaxError without location but with file context diff --git a/packages/cubejs-schema-compiler/test/unit/utils.ts b/packages/cubejs-schema-compiler/test/unit/utils.ts index b639bc166a81c..37b4f1d442ed4 100644 --- a/packages/cubejs-schema-compiler/test/unit/utils.ts +++ b/packages/cubejs-schema-compiler/test/unit/utils.ts @@ -1,4 +1,5 @@ import YAML from 'js-yaml'; +import { getEnv } from '@cubejs-backend/shared'; interface CreateCubeSchemaOptions { name: string, @@ -768,3 +769,18 @@ export function createJoinedCubesSchema(): string { }); `; } + +export function transformResultsForTesseractIfNeeded(results: Record[]) { + if (getEnv('nativeSqlPlanner')) { + return results.map(record => { + const fixedRecord: Record = {}; + for (const [key, value] of Object.entries(record)) { + const fixedKey = key.replace(/___/g, '__'); + fixedRecord[fixedKey] = value; + } + return fixedRecord; + }); + } + + return results; +} diff --git a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs index 1df0a7958a6e7..37ff0e395892f 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/base_tools.rs @@ -2,10 +2,12 @@ use super::base_query_options::FilterItem; use super::driver_tools::{DriverTools, NativeDriverTools}; use super::filter_group::{FilterGroup, NativeFilterGroup}; use super::filter_params::{FilterParams, NativeFilterParams}; +use super::join_definition::{JoinDefinition, NativeJoinDefinition}; use super::pre_aggregation_obj::{NativePreAggregationObj, PreAggregationObj}; use super::security_context::{NativeSecurityContext, SecurityContext}; use super::sql_templates_render::{NativeSqlTemplatesRender, SqlTemplatesRender}; use super::sql_utils::{NativeSqlUtils, SqlUtils}; +use crate::cube_bridge::join_hints::JoinHintItem; use cubenativeutils::wrappers::serializer::{ NativeDeserialize, NativeDeserializer, NativeSerialize, }; @@ -53,4 +55,9 @@ pub trait BaseTools { cube_name: String, name: String, ) -> Result; //TODO move to rust + + fn join_tree_for_hints( + &self, + hints: Vec, + ) -> Result, CubeError>; } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/cube_definition.rs b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/cube_definition.rs index b0851c2bb38a8..1316ee8dc6a65 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/cube_definition.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/cube_definition.rs @@ -18,6 +18,8 @@ pub struct CubeDefinitionStatic { pub is_view: Option, #[serde(rename = "calendar")] pub is_calendar: Option, + #[serde(rename = "joinMap")] + pub join_map: Option>>, } impl CubeDefinitionStatic { diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/query_properties.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/query_properties.rs index ba79f19e86b2e..86265a901d0eb 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/query_properties.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/query_properties.rs @@ -613,7 +613,7 @@ impl QueryProperties { let join = query_tools .cached_data_mut() .join_by_hints(dimension_and_filter_join_hints_concat.clone(), |hints| { - query_tools.join_graph().build_join(hints) + query_tools.base_tools().join_tree_for_hints(hints) })?; vec![(Vec::new(), join)] } @@ -628,7 +628,7 @@ impl QueryProperties { .into_iter() .chain(dimension_and_filter_join_hints_concat.clone().into_iter()) .collect::>(), - |hints| query_tools.join_graph().build_join(hints), + |hints| query_tools.base_tools().join_tree_for_hints(hints), )?; Ok((vec![m.clone()], join)) }) diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs index b22aa42c1ffe3..c431f9370a9e2 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/collectors/join_hints_collector.rs @@ -27,7 +27,7 @@ impl TraversalVisitor for JoinHintsCollector { _: &Self::State, ) -> Result, CubeError> { if node.is_multi_stage() { - //We don't add multi stage members childs to join hints + //We don't add multi-stage members childs to join hints return Ok(None); } @@ -83,8 +83,35 @@ impl TraversalVisitor for JoinHintsCollector { pub fn collect_join_hints(node: &Rc) -> Result, CubeError> { let mut visitor = JoinHintsCollector::new(); visitor.apply(node, &())?; - let res = visitor.extract_result(); - Ok(res) + let mut collected_hints = visitor.extract_result(); + + let join_map = match node.as_ref() { + MemberSymbol::Dimension(d) => d.join_map(), + MemberSymbol::TimeDimension(d) => d.join_map(), + MemberSymbol::Measure(m) => m.join_map(), + _ => &None, + }; + + if let Some(join_map) = join_map { + for hint in collected_hints.iter_mut() { + match hint { + // If hints array has single element, check if it can be enriched with join hints + JoinHintItem::Single(hints) => { + for path in join_map.iter() { + if let Some(hint_index) = path.iter().position(|p| p == hints) { + *hint = JoinHintItem::Vector(path[0..=hint_index].to_vec()); + break; + } + } + } + // If hints is an array with multiple elements, it means it already + // includes full join hint path + JoinHintItem::Vector(_) => {} + } + } + } + + Ok(collected_hints) } pub fn collect_join_hints_for_measures( diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/cube_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/cube_symbol.rs index b17c748d189d1..8ffdb6b894b7e 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/cube_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/cube_symbol.rs @@ -74,6 +74,7 @@ pub struct CubeTableSymbol { member_sql: Option>, alias: String, is_table_sql: bool, + join_map: Option>>, } impl CubeTableSymbol { @@ -82,12 +83,14 @@ impl CubeTableSymbol { member_sql: Option>, alias: String, is_table_sql: bool, + join_map: Option>>, ) -> Rc { Rc::new(Self { cube_name, member_sql, alias, is_table_sql, + join_map, }) } @@ -133,6 +136,10 @@ impl CubeTableSymbol { pub fn alias(&self) -> String { self.alias.clone() } + + pub fn join_map(&self) -> &Option>> { + &self.join_map + } } pub struct CubeTableSymbolFactory { @@ -203,6 +210,7 @@ impl SymbolFactory for CubeTableSymbolFactory { sql, alias, is_table_sql, + definition.static_data().join_map.clone(), ))) } } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs index cdbeafe58a94d..2525578db0d22 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs @@ -258,6 +258,10 @@ impl DimensionSymbol { &self.definition } + pub fn join_map(&self) -> &Option>> { + self.cube.join_map() + } + pub fn name(&self) -> &String { &self.name } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/measure_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/measure_symbol.rs index 3794a754121a1..4b4570b92fe89 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/measure_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/measure_symbol.rs @@ -488,6 +488,11 @@ impl MeasureSymbol { pub fn cube_name(&self) -> &String { &self.cube.cube_name() } + + pub fn join_map(&self) -> &Option>> { + self.cube.join_map() + } + pub fn name(&self) -> &String { &self.name } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/time_dimension_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/time_dimension_symbol.rs index 56c34cff292ab..30c48aa13039e 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/time_dimension_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/time_dimension_symbol.rs @@ -175,6 +175,13 @@ impl TimeDimensionSymbol { self.base_symbol.cube_name() } + pub fn join_map(&self) -> &Option>> { + match self.base_symbol.as_ref() { + MemberSymbol::Dimension(d) => d.join_map(), + _ => &None, + } + } + pub fn is_multi_stage(&self) -> bool { self.base_symbol.is_multi_stage() }