From 4d12eec8ed360ac3a46be045a746027a00d50589 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 11 Oct 2025 18:37:13 +0100 Subject: [PATCH 01/13] add predicate utils --- packages/db/src/query/index.ts | 12 + packages/db/src/query/predicate-utils.ts | 1251 +++++++++++++++++++++ packages/db/tests/predicate-utils.test.ts | 1008 +++++++++++++++++ 3 files changed, 2271 insertions(+) create mode 100644 packages/db/src/query/predicate-utils.ts create mode 100644 packages/db/tests/predicate-utils.test.ts diff --git a/packages/db/src/query/index.ts b/packages/db/src/query/index.ts index 17f4dd8e7..5f25a462b 100644 --- a/packages/db/src/query/index.ts +++ b/packages/db/src/query/index.ts @@ -57,3 +57,15 @@ export { export { type LiveQueryCollectionConfig } from "./live/types.js" export { type LiveQueryCollectionUtils } from "./live/collection-config-builder.js" + +// Predicate utilities for predicate push-down +export { + isWhereSubset, + intersectWherePredicates, + unionWherePredicates, + isOrderBySubset, + isLimitSubset, + isPredicateSubset, + intersectPredicates, + unionPredicates, +} from "./predicate-utils.js" diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts new file mode 100644 index 000000000..13dbc9d92 --- /dev/null +++ b/packages/db/src/query/predicate-utils.ts @@ -0,0 +1,1251 @@ +import type { BasicExpression, Func, OrderBy, PropRef } from "./ir.js" +import type { OnLoadMoreOptions } from "../types.js" + +/** + * Check if one where clause is a logical subset of another. + * Returns true if the subset predicate is more restrictive than (or equal to) the superset predicate. + * + * @example + * // age > 20 is subset of age > 10 (more restrictive) + * isWhereSubset(gt(ref('age'), val(20)), gt(ref('age'), val(10))) // true + * + * @example + * // age > 10 AND name = 'X' is subset of age > 10 (more conditions) + * isWhereSubset(and(gt(ref('age'), val(10)), eq(ref('name'), val('X'))), gt(ref('age'), val(10))) // true + * + * @param subset - The potentially more restrictive predicate + * @param superset - The potentially less restrictive predicate + * @returns true if subset logically implies superset + */ +export function isWhereSubset( + subset: BasicExpression | undefined, + superset: BasicExpression | undefined +): boolean { + // undefined/missing where clause means "no filter" (all data) + // Both undefined means subset relationship holds (all data ⊆ all data) + if (subset === undefined && superset === undefined) { + return true + } + + // If subset is undefined but superset is not, we're requesting ALL data + // but have only loaded SOME data - subset relationship does NOT hold + if (subset === undefined && superset !== undefined) { + return false + } + + // If superset is undefined (no filter = all data loaded), + // then any constrained subset is contained + if (superset === undefined && subset !== undefined) { + return true + } + + return isWhereSubsetInternal(subset!, superset!) +} + +function isWhereSubsetInternal( + subset: BasicExpression, + superset: BasicExpression +): boolean { + // If expressions are structurally equal, subset relationship holds + if (areExpressionsEqual(subset, superset)) { + return true + } + + // Handle subset being an AND: (A AND B) implies both A and B + if (subset.type === `func` && subset.name === `and`) { + // Special case: if superset is also AND, check if all conjuncts in superset are implied by subset + if (superset.type === `func` && superset.name === `and`) { + // For (A AND B) ⊆ (C AND D), we need every conjunct in superset to be implied by subset + // For each conjunct in superset, at least one conjunct in subset must be a subset of it + // OR the entire subset implies it + return superset.args.every((superArg) => { + // Check if any conjunct in subset is a subset of this superset conjunct + return subset.args.some((subArg) => + isWhereSubsetInternal( + subArg as BasicExpression, + superArg as BasicExpression + ) + ) + }) + } + // For (A AND B) ⊆ C, since (A AND B) implies A, we check if any conjunct implies C + return subset.args.some((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) + ) + } + + // Handle superset being an AND: subset must imply ALL conjuncts + // If superset is (A AND B), then subset ⊆ (A AND B) only if subset ⊆ A AND subset ⊆ B + // Example: (age > 20) ⊆ (age > 10 AND status = 'active') is false (doesn't imply status condition) + if (superset.type === `func` && superset.name === `and`) { + return superset.args.every((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + + // Handle OR in superset: subset ⊆ (A OR B) if subset ⊆ A or subset ⊆ B + // (A OR B) as superset means data can satisfy A or B + // If subset is contained in any disjunct, it's contained in the union + if (superset.type === `func` && superset.name === `or`) { + return superset.args.some((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + + // Handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C + if (subset.type === `func` && subset.name === `or`) { + return subset.args.every((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) + ) + } + + // Handle comparison operators on the same field + if (subset.type === `func` && superset.type === `func`) { + const subsetFunc = subset as Func + const supersetFunc = superset as Func + + // Check if both are comparisons on the same field + const subsetField = extractComparisonField(subsetFunc) + const supersetField = extractComparisonField(supersetFunc) + + if ( + subsetField && + supersetField && + areRefsEqual(subsetField.ref, supersetField.ref) + ) { + return isComparisonSubset( + subsetFunc, + subsetField.value, + supersetFunc, + supersetField.value + ) + } + + // Handle eq vs in + if (subsetFunc.name === `eq` && supersetFunc.name === `in`) { + const subsetFieldEq = extractEqualityField(subsetFunc) + const supersetFieldIn = extractInField(supersetFunc) + if ( + subsetFieldEq && + supersetFieldIn && + areRefsEqual(subsetFieldEq.ref, supersetFieldIn.ref) + ) { + // field = X is subset of field IN [X, Y, Z] if X is in the array + const inArray = supersetFieldIn.values + return inArray.some((val) => areValuesEqual(subsetFieldEq.value, val)) + } + } + + // Handle in vs in + if (subsetFunc.name === `in` && supersetFunc.name === `in`) { + const subsetFieldIn = extractInField(subsetFunc) + const supersetFieldIn = extractInField(supersetFunc) + if ( + subsetFieldIn && + supersetFieldIn && + areRefsEqual(subsetFieldIn.ref, supersetFieldIn.ref) + ) { + // field IN [A, B] is subset of field IN [A, B, C] if all values in subset are in superset + return subsetFieldIn.values.every((subVal) => + supersetFieldIn.values.some((superVal) => + areValuesEqual(subVal, superVal) + ) + ) + } + } + } + + // Conservative: if we can't determine, return false + return false +} + +/** + * Combine multiple where predicates with AND logic (intersection). + * Returns a predicate that is satisfied only when all input predicates are satisfied. + * Simplifies when possible (e.g., age > 10 AND age > 20 → age > 20). + * Returns a false literal if predicates are contradictory (empty set). + * + * @example + * // Take most restrictive + * intersectWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 20 + * + * @example + * // Different fields combine with AND + * intersectWherePredicates([gt(ref('age'), val(10)), eq(ref('status'), val('active'))]) + * // age > 10 AND status = 'active' + * + * @example + * // Contradictory predicates return false + * intersectWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(6))]) + * // {type: 'val', value: false} + * + * @param predicates - Array of where predicates to intersect + * @returns Combined predicate representing the intersection, or false literal for empty set + */ +export function intersectWherePredicates( + predicates: Array> +): BasicExpression { + if (predicates.length === 0) { + // No predicates means no filter (true) + return { type: `val`, value: true } as BasicExpression + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Flatten any AND expressions + const flatPredicates: Array> = [] + for (const pred of predicates) { + if (pred.type === `func` && pred.name === `and`) { + flatPredicates.push(...(pred.args as Array>)) + } else { + flatPredicates.push(pred) + } + } + + // Group predicates by field for simplification + const grouped = groupPredicatesByField(flatPredicates) + + // Simplify each group + const simplified: Array> = [] + for (const [field, preds] of grouped.entries()) { + if (field === null) { + // Complex predicates that we can't group by field + simplified.push(...preds) + } else { + // Try to simplify same-field predicates with AND logic + const result = intersectSameFieldPredicates(preds) + // Check if result is a false literal (empty set) + if (result.type === `val` && result.value === false) { + // Intersection is empty (conflicting constraints) - entire AND is false + return { type: `val`, value: false } as BasicExpression + } else { + simplified.push(result) + } + } + } + + if (simplified.length === 0) { + return { type: `val`, value: true } as BasicExpression + } + + if (simplified.length === 1) { + return simplified[0]! + } + + // Return AND of all simplified predicates + return { + type: `func`, + name: `and`, + args: simplified, + } as BasicExpression +} + +/** + * Combine multiple where predicates with OR logic (union). + * Returns a predicate that is satisfied when any input predicate is satisfied. + * Simplifies when possible (e.g., age > 10 OR age > 20 → age > 10). + * + * @example + * // Take least restrictive + * unionWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 10 + * + * @example + * // Combine equals into IN + * unionWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(10))]) // age IN [5, 10] + * + * @param predicates - Array of where predicates to union + * @returns Combined predicate representing the union + */ +export function unionWherePredicates( + predicates: Array> +): BasicExpression { + if (predicates.length === 0) { + // No predicates means no data matches (false) + return { type: `val`, value: false } as BasicExpression + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Flatten any OR expressions + const flatPredicates: Array> = [] + for (const pred of predicates) { + if (pred.type === `func` && pred.name === `or`) { + flatPredicates.push(...(pred.args as Array>)) + } else { + flatPredicates.push(pred) + } + } + + // Group predicates by field for simplification + const grouped = groupPredicatesByField(flatPredicates) + + // Simplify each group + const simplified: Array> = [] + for (const [field, preds] of grouped.entries()) { + if (field === null) { + // Complex predicates that we can't group by field + simplified.push(...preds) + } else { + // Try to simplify same-field predicates with OR logic + const result = unionSameFieldPredicates(preds) + if (result) { + simplified.push(result) + } + } + } + + if (simplified.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (simplified.length === 1) { + return simplified[0]! + } + + // Return OR of all simplified predicates + return { + type: `func`, + name: `or`, + args: simplified, + } as BasicExpression +} + +/** + * Check if one orderBy clause is a subset of another. + * Returns true if the subset ordering requirements are satisfied by the superset ordering. + * + * @example + * // Subset is prefix of superset + * isOrderBySubset([{expr: age, asc}], [{expr: age, asc}, {expr: name, desc}]) // true + * + * @param subset - The ordering requirements to check + * @param superset - The ordering that might satisfy the requirements + * @returns true if subset is satisfied by superset + */ +export function isOrderBySubset( + subset: OrderBy | undefined, + superset: OrderBy | undefined +): boolean { + // No ordering requirement is always satisfied + if (!subset || subset.length === 0) { + return true + } + + // If there's no superset ordering but subset requires ordering, not satisfied + if (!superset || superset.length === 0) { + return false + } + + // Check if subset is a prefix of superset with matching expressions and compare options + if (subset.length > superset.length) { + return false + } + + for (let i = 0; i < subset.length; i++) { + const subClause = subset[i]! + const superClause = superset[i]! + + // Check if expressions match + if (!areExpressionsEqual(subClause.expression, superClause.expression)) { + return false + } + + // Check if compare options match + if ( + !areCompareOptionsEqual( + subClause.compareOptions, + superClause.compareOptions + ) + ) { + return false + } + } + + return true +} + +/** + * Check if one limit is a subset of another. + * Returns true if the subset limit requirements are satisfied by the superset limit. + * + * @example + * isLimitSubset(10, 20) // true (requesting 10 items when 20 are available) + * isLimitSubset(20, 10) // false (requesting 20 items when only 10 are available) + * isLimitSubset(10, undefined) // true (requesting 10 items when unlimited are available) + * + * @param subset - The limit requirement to check + * @param superset - The limit that might satisfy the requirement + * @returns true if subset is satisfied by superset + */ +export function isLimitSubset( + subset: number | undefined, + superset: number | undefined +): boolean { + // No limit requirement is always satisfied + if (subset === undefined) { + return true + } + + // Unlimited superset satisfies any limit requirement + if (superset === undefined) { + return true + } + + // Otherwise, subset must be less than or equal to superset + return subset <= superset +} + +/** + * Check if one predicate (where + orderBy + limit) is a subset of another. + * Returns true if all aspects of the subset predicate are satisfied by the superset. + * + * @example + * isPredicateSubset( + * { where: gt(ref('age'), val(20)), limit: 10 }, + * { where: gt(ref('age'), val(10)), limit: 20 } + * ) // true + * + * @param subset - The predicate requirements to check + * @param superset - The predicate that might satisfy the requirements + * @returns true if subset is satisfied by superset + */ +export function isPredicateSubset( + subset: OnLoadMoreOptions, + superset: OnLoadMoreOptions +): boolean { + return ( + isWhereSubset(subset.where, superset.where) && + isOrderBySubset(subset.orderBy, superset.orderBy) && + isLimitSubset(subset.limit, superset.limit) + ) +} + +/** + * Merge multiple predicates by intersecting their where clauses. + * Intersection semantics: returns predicate satisfied by data matching ALL input predicates. + * For limits, this means the MINIMUM (most restrictive) limit. + * + * @param predicates - Array of predicates to merge + * @returns Combined predicate representing the intersection + */ +export function intersectPredicates( + predicates: Array +): OnLoadMoreOptions { + if (predicates.length === 0) { + return {} + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Intersect where clauses + const whereClauses = predicates + .map((p) => p.where) + .filter((w): w is BasicExpression => w !== undefined) + + const mergedWhere = + whereClauses.length > 0 ? intersectWherePredicates(whereClauses) : undefined + + // Use first non-empty orderBy (they should be compatible if predicates are related) + const mergedOrderBy = predicates.find( + (p) => p.orderBy && p.orderBy.length > 0 + )?.orderBy + + // Use minimum limit (most restrictive - intersection must satisfy all constraints) + // If any predicate is unlimited, the intersection is limited by the others + const limits = predicates + .map((p) => p.limit) + .filter((l): l is number => l !== undefined) + const mergedLimit = + limits.length === 0 + ? undefined // All unlimited = result unlimited + : Math.min(...limits) // Take most restrictive + + return { + where: mergedWhere, + orderBy: mergedOrderBy, + limit: mergedLimit, + } +} + +/** + * Merge multiple predicates by unioning their where clauses. + * + * @param predicates - Array of predicates to merge + * @returns Combined predicate + */ +export function unionPredicates( + predicates: Array +): OnLoadMoreOptions { + if (predicates.length === 0) { + return {} + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Union where clauses + const whereClauses = predicates + .map((p) => p.where) + .filter((w): w is BasicExpression => w !== undefined) + + const mergedWhere = + whereClauses.length > 0 ? unionWherePredicates(whereClauses) : undefined + + // For union, orderBy doesn't really make sense (different orderings) + // Return undefined + const mergedOrderBy = undefined + + // For union, take minimum limit if all have limits + const limits = predicates + .map((p) => p.limit) + .filter((l): l is number => l !== undefined) + const mergedLimit = + limits.length === predicates.length && limits.length > 0 + ? Math.min(...limits) + : undefined + + return { + where: mergedWhere, + orderBy: mergedOrderBy, + limit: mergedLimit, + } +} + +// ============================================================================ +// Helper functions +// ============================================================================ + +function areExpressionsEqual(a: BasicExpression, b: BasicExpression): boolean { + if (a.type !== b.type) { + return false + } + + if (a.type === `val` && b.type === `val`) { + return areValuesEqual(a.value, b.value) + } + + if (a.type === `ref` && b.type === `ref`) { + return areRefsEqual(a, b) + } + + if (a.type === `func` && b.type === `func`) { + const aFunc = a + const bFunc = b + if (aFunc.name !== bFunc.name) { + return false + } + if (aFunc.args.length !== bFunc.args.length) { + return false + } + return aFunc.args.every((arg, i) => + areExpressionsEqual(arg, bFunc.args[i]!) + ) + } + + return false +} + +function areValuesEqual(a: any, b: any): boolean { + // Simple equality check - could be enhanced for deep object comparison + if (a === b) { + return true + } + + // Handle NaN + if (typeof a === `number` && typeof b === `number` && isNaN(a) && isNaN(b)) { + return true + } + + // Handle Date objects + if (a instanceof Date && b instanceof Date) { + return a.getTime() === b.getTime() + } + + // For arrays and objects, use JSON comparison (simple but not perfect) + if ( + typeof a === `object` && + typeof b === `object` && + a !== null && + b !== null + ) { + try { + return JSON.stringify(a) === JSON.stringify(b) + } catch { + return false + } + } + + return false +} + +function areRefsEqual(a: PropRef, b: PropRef): boolean { + if (a.path.length !== b.path.length) { + return false + } + return a.path.every((segment, i) => segment === b.path[i]) +} + +/** + * Get the maximum of two values, handling both numbers and Dates + */ +function maxValue(a: any, b: any): any { + if (a instanceof Date && b instanceof Date) { + return a.getTime() > b.getTime() ? a : b + } + return Math.max(a, b) +} + +/** + * Get the minimum of two values, handling both numbers and Dates + */ +function minValue(a: any, b: any): any { + if (a instanceof Date && b instanceof Date) { + return a.getTime() < b.getTime() ? a : b + } + return Math.min(a, b) +} + +function areCompareOptionsEqual( + a: { direction?: `asc` | `desc`; [key: string]: any }, + b: { direction?: `asc` | `desc`; [key: string]: any } +): boolean { + // For now, just compare direction - could be enhanced for other options + return a.direction === b.direction +} + +interface ComparisonField { + ref: PropRef + value: any +} + +function extractComparisonField(func: Func): ComparisonField | null { + // Handle comparison operators: eq, gt, gte, lt, lte + if ([`eq`, `gt`, `gte`, `lt`, `lte`].includes(func.name)) { + // Assume first arg is ref, second is value + const firstArg = func.args[0] + const secondArg = func.args[1] + + if (firstArg?.type === `ref` && secondArg?.type === `val`) { + return { + ref: firstArg, + value: secondArg.value, + } + } + } + + return null +} + +function extractEqualityField(func: Func): ComparisonField | null { + if (func.name === `eq`) { + const firstArg = func.args[0] + const secondArg = func.args[1] + + if (firstArg?.type === `ref` && secondArg?.type === `val`) { + return { + ref: firstArg, + value: secondArg.value, + } + } + } + return null +} + +interface InField { + ref: PropRef + values: Array +} + +function extractInField(func: Func): InField | null { + if (func.name === `in`) { + const firstArg = func.args[0] + const secondArg = func.args[1] + + if ( + firstArg?.type === `ref` && + secondArg?.type === `val` && + Array.isArray(secondArg.value) + ) { + return { + ref: firstArg, + values: secondArg.value, + } + } + } + return null +} + +function isComparisonSubset( + subsetFunc: Func, + subsetValue: any, + supersetFunc: Func, + supersetValue: any +): boolean { + const subOp = subsetFunc.name + const superOp = supersetFunc.name + + // Handle same operator + if (subOp === superOp) { + if (subOp === `eq`) { + // field = X is subset of field = X only + return areValuesEqual(subsetValue, supersetValue) + } else if (subOp === `gt`) { + // field > 20 is subset of field > 10 if 20 > 10 + return subsetValue >= supersetValue + } else if (subOp === `gte`) { + // field >= 20 is subset of field >= 10 if 20 >= 10 + return subsetValue >= supersetValue + } else if (subOp === `lt`) { + // field < 10 is subset of field < 20 if 10 <= 20 + return subsetValue <= supersetValue + } else if (subOp === `lte`) { + // field <= 10 is subset of field <= 20 if 10 <= 20 + return subsetValue <= supersetValue + } + } + + // Handle different operators on same field + // eq vs gt/gte: field = 15 is subset of field > 10 if 15 > 10 + if (subOp === `eq` && superOp === `gt`) { + return subsetValue > supersetValue + } + if (subOp === `eq` && superOp === `gte`) { + return subsetValue >= supersetValue + } + if (subOp === `eq` && superOp === `lt`) { + return subsetValue < supersetValue + } + if (subOp === `eq` && superOp === `lte`) { + return subsetValue <= supersetValue + } + + // gt/gte vs gte/gt + if (subOp === `gt` && superOp === `gte`) { + // field > 10 is subset of field >= 10 if 10 >= 10 (always true for same value) + return subsetValue >= supersetValue + } + if (subOp === `gte` && superOp === `gt`) { + // field >= 11 is subset of field > 10 if 11 > 10 + return subsetValue > supersetValue + } + + // lt/lte vs lte/lt + if (subOp === `lt` && superOp === `lte`) { + // field < 10 is subset of field <= 10 if 10 <= 10 + return subsetValue <= supersetValue + } + if (subOp === `lte` && superOp === `lt`) { + // field <= 9 is subset of field < 10 if 9 < 10 + return subsetValue < supersetValue + } + + return false +} + +function groupPredicatesByField( + predicates: Array> +): Map>> { + const groups = new Map>>() + + for (const pred of predicates) { + let fieldKey: string | null = null + + if (pred.type === `func`) { + const func = pred as Func + const field = + extractComparisonField(func) || + extractEqualityField(func) || + extractInField(func) + if (field) { + fieldKey = field.ref.path.join(`.`) + } + } + + const group = groups.get(fieldKey) || [] + group.push(pred) + groups.set(fieldKey, group) + } + + return groups +} + +function intersectSameFieldPredicates( + predicates: Array> +): BasicExpression { + if (predicates.length === 1) { + return predicates[0]! + } + + // Try to extract range constraints + let minGt: number | null = null + let minGte: number | null = null + let maxLt: number | null = null + let maxLte: number | null = null + const eqValues: Set = new Set() + const inValueSets: Array> = [] + const otherPredicates: Array> = [] + + for (const pred of predicates) { + if (pred.type === `func`) { + const func = pred as Func + const field = extractComparisonField(func) + + if (field) { + const value = field.value + if (func.name === `gt`) { + minGt = minGt === null ? value : maxValue(minGt, value) + } else if (func.name === `gte`) { + minGte = minGte === null ? value : maxValue(minGte, value) + } else if (func.name === `lt`) { + maxLt = maxLt === null ? value : minValue(maxLt, value) + } else if (func.name === `lte`) { + maxLte = maxLte === null ? value : minValue(maxLte, value) + } else if (func.name === `eq`) { + eqValues.add(value) + } else { + otherPredicates.push(pred) + } + } else { + const inField = extractInField(func) + if (inField) { + inValueSets.push(inField.values) + } else { + otherPredicates.push(pred) + } + } + } else { + otherPredicates.push(pred) + } + } + + // Check for conflicting equality values (field = 5 AND field = 6 → empty set) + // Need to use areValuesEqual for proper Date/object comparison + const uniqueEqValues: Array = [] + for (const value of eqValues) { + if (!uniqueEqValues.some((v) => areValuesEqual(v, value))) { + uniqueEqValues.push(value) + } + } + if (uniqueEqValues.length > 1) { + return { type: `val`, value: false } as BasicExpression + } + + // If we have an equality, that's the most restrictive + const eqValue = uniqueEqValues.length === 1 ? uniqueEqValues[0] : null + if (eqValue !== null) { + // Check if it satisfies the range constraints + if (minGt !== null && !(eqValue > minGt)) { + return { type: `val`, value: false } as BasicExpression + } + if (minGte !== null && !(eqValue >= minGte)) { + return { type: `val`, value: false } as BasicExpression + } + if (maxLt !== null && !(eqValue < maxLt)) { + return { type: `val`, value: false } as BasicExpression + } + if (maxLte !== null && !(eqValue <= maxLte)) { + return { type: `val`, value: false } as BasicExpression + } + + // Check if it's in all IN sets (use areValuesEqual for Date support) + for (const inSet of inValueSets) { + if (!inSet.some((v) => areValuesEqual(v, eqValue))) { + return { type: `val`, value: false } as BasicExpression + } + } + + // Return just the equality (use areValuesEqual for Date support) + return predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + const field = extractComparisonField(f) + return f.name === `eq` && field && areValuesEqual(field.value, eqValue) + } + return false + })! + } + + // Handle intersection of multiple IN clauses (use areValuesEqual for Date support) + let intersectedInValues: Array | null = null + if (inValueSets.length > 0) { + intersectedInValues = [...inValueSets[0]!] + for (let i = 1; i < inValueSets.length; i++) { + const currentSet = inValueSets[i]! + intersectedInValues = intersectedInValues.filter((v) => + currentSet.some((cv) => areValuesEqual(v, cv)) + ) + } + // If intersection is empty, return false literal + if (intersectedInValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + } + + // Build the most restrictive range + const result: Array> = [] + + // Choose the most restrictive lower bound + if (minGt !== null && minGte !== null) { + // If we have both > and >=, use > if it's more restrictive + if (minGt >= minGte) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `gt` && extractComparisonField(f)?.value === minGt + } + return false + })! + ) + } else { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return ( + f.name === `gte` && extractComparisonField(f)?.value === minGte + ) + } + return false + })! + ) + } + } else if (minGt !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `gt` && extractComparisonField(f)?.value === minGt + } + return false + })! + ) + } else if (minGte !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `gte` && extractComparisonField(f)?.value === minGte + } + return false + })! + ) + } + + // Choose the most restrictive upper bound + if (maxLt !== null && maxLte !== null) { + if (maxLt <= maxLte) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `lt` && extractComparisonField(f)?.value === maxLt + } + return false + })! + ) + } else { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return ( + f.name === `lte` && extractComparisonField(f)?.value === maxLte + ) + } + return false + })! + ) + } + } else if (maxLt !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `lt` && extractComparisonField(f)?.value === maxLt + } + return false + })! + ) + } else if (maxLte !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `lte` && extractComparisonField(f)?.value === maxLte + } + return false + })! + ) + } + + // Add intersected IN values if present + if (intersectedInValues !== null && intersectedInValues.length > 0) { + // Get the ref from one of the original IN predicates + const firstInPred = predicates.find((p) => { + if (p.type === `func`) { + return (p as Func).name === `in` + } + return false + }) + + if (firstInPred && firstInPred.type === `func`) { + const ref = (firstInPred as Func).args[0] + result.push({ + type: `func`, + name: `in`, + args: [ + ref, + { type: `val`, value: intersectedInValues } as BasicExpression, + ], + } as BasicExpression) + } + } + + // Add other predicates + result.push(...otherPredicates) + + if (result.length === 0) { + return { type: `val`, value: true } as BasicExpression + } + + if (result.length === 1) { + return result[0]! + } + + return { + type: `func`, + name: `and`, + args: result, + } as BasicExpression +} + +function unionSameFieldPredicates( + predicates: Array> +): BasicExpression | null { + if (predicates.length === 1) { + return predicates[0]! + } + + // Try to extract range constraints + let maxGt: number | null = null + let maxGte: number | null = null + let minLt: number | null = null + let minLte: number | null = null + const eqValues: Set = new Set() + const inValues: Set = new Set() + const otherPredicates: Array> = [] + + for (const pred of predicates) { + if (pred.type === `func`) { + const func = pred as Func + const field = extractComparisonField(func) + + if (field) { + const value = field.value + if (func.name === `gt`) { + maxGt = maxGt === null ? value : minValue(maxGt, value) + } else if (func.name === `gte`) { + maxGte = maxGte === null ? value : minValue(maxGte, value) + } else if (func.name === `lt`) { + minLt = minLt === null ? value : maxValue(minLt, value) + } else if (func.name === `lte`) { + minLte = minLte === null ? value : maxValue(minLte, value) + } else if (func.name === `eq`) { + eqValues.add(value) + } else { + otherPredicates.push(pred) + } + } else { + const inField = extractInField(func) + if (inField) { + for (const val of inField.values) { + inValues.add(val) + } + } else { + otherPredicates.push(pred) + } + } + } else { + otherPredicates.push(pred) + } + } + + // If we have multiple equality values, combine into IN + if (eqValues.size > 1 || (eqValues.size > 0 && inValues.size > 0)) { + const allValues = [...eqValues, ...inValues] + const ref = predicates.find((p) => { + if (p.type === `func`) { + const field = + extractComparisonField(p as Func) || extractInField(p as Func) + return field !== null + } + return false + }) + + if (ref && ref.type === `func`) { + const field = + extractComparisonField(ref as Func) || extractInField(ref as Func) + if (field) { + return { + type: `func`, + name: `in`, + args: [ + field.ref, + { type: `val`, value: allValues } as BasicExpression, + ], + } as BasicExpression + } + } + } + + // Build the least restrictive range + const result: Array> = [] + + // Choose the least restrictive lower bound + if (maxGt !== null && maxGte !== null) { + // Take the smaller one (less restrictive) + if (maxGte <= maxGt) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return ( + f.name === `gte` && extractComparisonField(f)?.value === maxGte + ) + } + return false + })! + ) + } else { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `gt` && extractComparisonField(f)?.value === maxGt + } + return false + })! + ) + } + } else if (maxGt !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `gt` && extractComparisonField(f)?.value === maxGt + } + return false + })! + ) + } else if (maxGte !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `gte` && extractComparisonField(f)?.value === maxGte + } + return false + })! + ) + } + + // Choose the least restrictive upper bound + if (minLt !== null && minLte !== null) { + if (minLte >= minLt) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return ( + f.name === `lte` && extractComparisonField(f)?.value === minLte + ) + } + return false + })! + ) + } else { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `lt` && extractComparisonField(f)?.value === minLt + } + return false + })! + ) + } + } else if (minLt !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `lt` && extractComparisonField(f)?.value === minLt + } + return false + })! + ) + } else if (minLte !== null) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return f.name === `lte` && extractComparisonField(f)?.value === minLte + } + return false + })! + ) + } + + // Add single eq value + if (eqValues.size === 1 && inValues.size === 0) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + return ( + f.name === `eq` && + extractComparisonField(f)?.value === [...eqValues][0] + ) + } + return false + })! + ) + } + + // Add IN if only IN values + if (eqValues.size === 0 && inValues.size > 0) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + return (p as Func).name === `in` + } + return false + })! + ) + } + + // Add other predicates + result.push(...otherPredicates) + + if (result.length === 0) { + return { type: `val`, value: true } as BasicExpression + } + + if (result.length === 1) { + return result[0]! + } + + return { + type: `func`, + name: `or`, + args: result, + } as BasicExpression +} diff --git a/packages/db/tests/predicate-utils.test.ts b/packages/db/tests/predicate-utils.test.ts new file mode 100644 index 000000000..ce01c5137 --- /dev/null +++ b/packages/db/tests/predicate-utils.test.ts @@ -0,0 +1,1008 @@ +import { describe, expect, it } from "vitest" +import { + intersectPredicates, + intersectWherePredicates, + isLimitSubset, + isOrderBySubset, + isPredicateSubset, + isWhereSubset, + unionPredicates, + unionWherePredicates, +} from "../src/query/predicate-utils" +import { Func, PropRef, Value } from "../src/query/ir" +import type { BasicExpression, OrderBy, OrderByClause } from "../src/query/ir" +import type { OnLoadMoreOptions } from "../src/types" + +// Helper functions to build expressions more easily +function ref(path: string | Array): PropRef { + return new PropRef(typeof path === `string` ? [path] : path) +} + +function val(value: any): Value { + return new Value(value) +} + +function func(name: string, ...args: Array): Func { + return new Func(name, args) +} + +function eq(left: BasicExpression, right: BasicExpression): Func { + return func(`eq`, left, right) +} + +function gt(left: BasicExpression, right: BasicExpression): Func { + return func(`gt`, left, right) +} + +function gte(left: BasicExpression, right: BasicExpression): Func { + return func(`gte`, left, right) +} + +function lt(left: BasicExpression, right: BasicExpression): Func { + return func(`lt`, left, right) +} + +function lte(left: BasicExpression, right: BasicExpression): Func { + return func(`lte`, left, right) +} + +function and(...args: Array): Func { + return func(`and`, ...args) +} + +function or(...args: Array): Func { + return func(`or`, ...args) +} + +function inOp(left: BasicExpression, values: Array): Func { + return func(`in`, left, val(values)) +} + +function orderByClause( + expression: BasicExpression, + direction: `asc` | `desc` = `asc` +): OrderByClause { + return { + expression, + compareOptions: { + direction, + nulls: `last`, + stringSort: `lexical`, + }, + } +} + +describe(`isWhereSubset`, () => { + describe(`basic cases`, () => { + it(`should return true for both undefined (all data is subset of all data)`, () => { + expect(isWhereSubset(undefined, undefined)).toBe(true) + }) + + it(`should return false for undefined subset with constrained superset`, () => { + // Requesting ALL data but only loaded SOME data = NOT subset + expect(isWhereSubset(undefined, gt(ref(`age`), val(10)))).toBe(false) + }) + + it(`should return true for constrained subset with undefined superset`, () => { + // Loaded ALL data, so any constrained subset is covered + expect(isWhereSubset(gt(ref(`age`), val(20)), undefined)).toBe(true) + }) + + it(`should return true for identical expressions`, () => { + const expr = gt(ref(`age`), val(10)) + expect(isWhereSubset(expr, expr)).toBe(true) + }) + + it(`should return true for structurally equal expressions`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + }) + + describe(`comparison operators`, () => { + it(`should handle gt: age > 20 is subset of age > 10`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(20)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle gt: age > 10 is NOT subset of age > 20`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gt(ref(`age`), val(20))) + ).toBe(false) + }) + + it(`should handle gte: age >= 20 is subset of age >= 10`, () => { + expect( + isWhereSubset(gte(ref(`age`), val(20)), gte(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle lt: age < 10 is subset of age < 20`, () => { + expect( + isWhereSubset(lt(ref(`age`), val(10)), lt(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle lt: age < 20 is NOT subset of age < 10`, () => { + expect( + isWhereSubset(lt(ref(`age`), val(20)), lt(ref(`age`), val(10))) + ).toBe(false) + }) + + it(`should handle lte: age <= 10 is subset of age <= 20`, () => { + expect( + isWhereSubset(lte(ref(`age`), val(10)), lte(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle eq: age = 15 is subset of age > 10`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle eq: age = 5 is NOT subset of age > 10`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(5)), gt(ref(`age`), val(10))) + ).toBe(false) + }) + + it(`should handle eq: age = 15 is subset of age >= 15`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), gte(ref(`age`), val(15))) + ).toBe(true) + }) + + it(`should handle eq: age = 15 is subset of age < 20`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), lt(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle mixed operators: gt vs gte`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gte(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle mixed operators: gte vs gt`, () => { + expect( + isWhereSubset(gte(ref(`age`), val(11)), gt(ref(`age`), val(10))) + ).toBe(true) + expect( + isWhereSubset(gte(ref(`age`), val(10)), gt(ref(`age`), val(10))) + ).toBe(false) + }) + }) + + describe(`IN operator`, () => { + it(`should handle eq vs in: age = 5 is subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(5)), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle eq vs in: age = 20 is NOT subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(20)), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + + it(`should handle in vs in: [5, 10] is subset of [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 10]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle in vs in: [5, 20] is NOT subset of [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 20]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + }) + + describe(`AND combinations`, () => { + it(`should handle AND in subset: (A AND B) is subset of A`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + gt(ref(`age`), val(10)) + ) + ).toBe(true) + }) + + it(`should handle AND in subset: (A AND B) is NOT subset of C (different field)`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)) + ) + ).toBe(false) + }) + + it(`should handle AND in superset: A is subset of (A AND B) is false (superset is more restrictive)`, () => { + expect( + isWhereSubset( + gt(ref(`age`), val(10)), + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(false) + }) + + it(`should handle AND in both: (age > 20 AND status = 'active') is subset of (age > 10 AND status = 'active')`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(20)), eq(ref(`status`), val(`active`))), + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + }) + + describe(`OR combinations`, () => { + it(`should handle OR in superset: A is subset of (A OR B)`, () => { + expect( + isWhereSubset( + gt(ref(`age`), val(10)), + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + + it(`should return false when subset doesn't imply any branch of OR superset`, () => { + expect( + isWhereSubset( + eq(ref(`age`), val(10)), + or(gt(ref(`age`), val(10)), lt(ref(`age`), val(5))) + ) + ).toBe(false) + }) + + it(`should handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), gt(ref(`age`), val(30))), + gt(ref(`age`), val(10)) + ) + ).toBe(true) + }) + + it(`should handle OR in subset: (A OR B) is NOT subset of C if either is not a subset`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), lt(ref(`age`), val(5))), + gt(ref(`age`), val(10)) + ) + ).toBe(false) + }) + }) + + describe(`different fields`, () => { + it(`should return false for different fields with no relationship`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(20)), gt(ref(`salary`), val(1000))) + ).toBe(false) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should handle Date equality`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + eq(ref(`createdAt`), val(date2)) + ) + ).toBe(true) + }) + + it(`should handle Date range comparisons: date > 2024-01-15 is subset of date > 2024-01-01`, () => { + expect( + isWhereSubset( + gt(ref(`createdAt`), val(date2)), + gt(ref(`createdAt`), val(date1)) + ) + ).toBe(true) + }) + + it(`should handle Date range comparisons: date < 2024-01-15 is subset of date < 2024-02-01`, () => { + expect( + isWhereSubset( + lt(ref(`createdAt`), val(date2)), + lt(ref(`createdAt`), val(date3)) + ) + ).toBe(true) + }) + + it(`should handle Date equality vs range: date = 2024-01-15 is subset of date > 2024-01-01`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + gt(ref(`createdAt`), val(date1)) + ) + ).toBe(true) + }) + + it(`should handle Date equality vs IN: date = 2024-01-15 is subset of date IN [2024-01-01, 2024-01-15, 2024-02-01]`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + inOp(ref(`createdAt`), [date1, date2, date3]) + ) + ).toBe(true) + }) + + it(`should handle Date IN subset: date IN [2024-01-01, 2024-01-15] is subset of date IN [2024-01-01, 2024-01-15, 2024-02-01]`, () => { + expect( + isWhereSubset( + inOp(ref(`createdAt`), [date1, date2]), + inOp(ref(`createdAt`), [date1, date2, date3]) + ) + ).toBe(true) + }) + + it(`should return false when Date not in IN set`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date1)), + inOp(ref(`createdAt`), [date2, date3]) + ) + ).toBe(false) + }) + }) +}) + +describe(`intersectWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return true for empty array`, () => { + const result = intersectWherePredicates([]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(true) + }) + + it(`should return the single predicate as-is`, () => { + const pred = gt(ref(`age`), val(10)) + const result = intersectWherePredicates([pred]) + expect(result).toBe(pred) + }) + }) + + describe(`same field comparisons`, () => { + it(`should take most restrictive for gt: age > 10 AND age > 20 → age > 20`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + gt(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should take most restrictive for gte: age >= 10 AND age >= 20 → age >= 20`, () => { + const result = intersectWherePredicates([ + gte(ref(`age`), val(10)), + gte(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should take most restrictive for lt: age < 20 AND age < 10 → age < 10`, () => { + const result = intersectWherePredicates([ + lt(ref(`age`), val(20)), + lt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`lt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should combine range: age > 10 AND age < 50`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + lt(ref(`age`), val(50)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(2) + }) + + it(`should prefer eq when present: age = 15 AND age > 10 → age = 15`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(15)), + gt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(15) + }) + + it(`should handle gt and gte together: age > 10 AND age >= 15 → age >= 15`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + gte(ref(`age`), val(15)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(15) + }) + }) + + describe(`different fields`, () => { + it(`should combine with AND: age > 10 AND status = 'active'`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(2) + }) + }) + + describe(`flatten AND`, () => { + it(`should flatten nested ANDs`, () => { + const result = intersectWherePredicates([ + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(3) + }) + }) + + describe(`conflict detection`, () => { + it(`should return false literal for conflicting equalities: age = 5 AND age = 6`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(5)), + eq(ref(`age`), val(6)), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should handle IN intersection: IN [1,2] AND IN [2,3] → IN [2]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`age`), [1, 2]), + inOp(ref(`age`), [2, 3]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toEqual([2]) + }) + + it(`should return false literal for empty IN intersection: IN [1,2] AND IN [3,4]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`age`), [1, 2]), + inOp(ref(`age`), [3, 4]), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should handle multiple IN intersections: IN [1,2,3] AND IN [2,3,4] AND IN [2,4,5] → IN [2]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`age`), [1, 2, 3]), + inOp(ref(`age`), [2, 3, 4]), + inOp(ref(`age`), [2, 4, 5]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toEqual([2]) + }) + + it(`should handle satisfiable equality AND IN: age = 2 AND age IN [1,2] → age = 2`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(2)), + inOp(ref(`age`), [1, 2]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const value = ((result as Func).args[1] as Value).value + expect(value).toBe(2) + }) + + it(`should return false literal for unsatisfiable equality AND IN: age = 2 AND age IN [3,4]`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(2)), + inOp(ref(`age`), [3, 4]), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should intersect Date ranges: date > 2024-01-01 AND date > 2024-01-15 → date > 2024-01-15`, () => { + const result = intersectWherePredicates([ + gt(ref(`createdAt`), val(date1)), + gt(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const value = ((result as Func).args[1] as Value).value + expect(value).toEqual(date2) + }) + + it(`should intersect Date range with bounds: date > 2024-01-01 AND date < 2024-02-01`, () => { + const result = intersectWherePredicates([ + gt(ref(`createdAt`), val(date1)), + lt(ref(`createdAt`), val(date3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(2) + }) + + it(`should handle Date equality: date = 2024-01-15 AND date = 2024-01-15 → date = 2024-01-15`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date2)), + eq(ref(`createdAt`), val(new Date(`2024-01-15`))), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const value = ((result as Func).args[1] as Value).value + expect(value).toEqual(date2) + }) + + it(`should return false literal for conflicting Date equalities`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date1)), + eq(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should handle Date IN clause intersection: IN [date1,date2] AND IN [date2,date3] → IN [date2]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`createdAt`), [date1, date2]), + inOp(ref(`createdAt`), [date2, date3]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(1) + expect(values[0]).toEqual(date2) + }) + + it(`should handle Date equality AND IN: date = date2 AND date IN [date1,date2] → date = date2`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date2)), + inOp(ref(`createdAt`), [date1, date2]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const value = ((result as Func).args[1] as Value).value + expect(value).toEqual(date2) + }) + + it(`should return false literal for Date equality AND non-matching IN`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date1)), + inOp(ref(`createdAt`), [date2, date3]), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + }) +}) + +describe(`unionWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return false for empty array`, () => { + const result = unionWherePredicates([]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should return the single predicate as-is`, () => { + const pred = gt(ref(`age`), val(10)) + const result = unionWherePredicates([pred]) + expect(result).toBe(pred) + }) + }) + + describe(`same field comparisons`, () => { + it(`should take least restrictive for gt: age > 10 OR age > 20 → age > 10`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + gt(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should take least restrictive for gte: age >= 10 OR age >= 20 → age >= 10`, () => { + const result = unionWherePredicates([ + gte(ref(`age`), val(10)), + gte(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should take least restrictive for lt: age < 20 OR age < 10 → age < 20`, () => { + const result = unionWherePredicates([ + lt(ref(`age`), val(20)), + lt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`lt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should combine eq into IN: age = 5 OR age = 10 → age IN [5, 10]`, () => { + const result = unionWherePredicates([ + eq(ref(`age`), val(5)), + eq(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toContain(5) + expect(values).toContain(10) + expect(values.length).toBe(2) + }) + + it(`should fold IN and equality into single IN: age IN [1,2] OR age = 3 → age IN [1,2,3]`, () => { + const result = unionWherePredicates([ + inOp(ref(`age`), [1, 2]), + eq(ref(`age`), val(3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toContain(1) + expect(values).toContain(2) + expect(values).toContain(3) + expect(values.length).toBe(3) + }) + + it(`should handle gte and gt together: age > 10 OR age >= 15 → age > 10`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + gte(ref(`age`), val(15)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + }) + + describe(`different fields`, () => { + it(`should combine with OR: age > 10 OR status = 'active'`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`or`) + expect((result as Func).args.length).toBe(2) + }) + }) + + describe(`flatten OR`, () => { + it(`should flatten nested ORs`, () => { + const result = unionWherePredicates([ + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`or`) + expect((result as Func).args.length).toBe(3) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should combine Date equalities into IN: date = date1 OR date = date2 → date IN [date1, date2]`, () => { + const result = unionWherePredicates([ + eq(ref(`createdAt`), val(date1)), + eq(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(2) + expect(values).toContainEqual(date1) + expect(values).toContainEqual(date2) + }) + + it(`should fold Date IN and equality: date IN [date1,date2] OR date = date3 → date IN [date1,date2,date3]`, () => { + const result = unionWherePredicates([ + inOp(ref(`createdAt`), [date1, date2]), + eq(ref(`createdAt`), val(date3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(3) + expect(values).toContainEqual(date1) + expect(values).toContainEqual(date2) + expect(values).toContainEqual(date3) + }) + }) +}) + +describe(`isOrderBySubset`, () => { + it(`should return true for undefined subset`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(undefined, orderBy)).toBe(true) + expect(isOrderBySubset([], orderBy)).toBe(true) + }) + + it(`should return false for undefined superset with non-empty subset`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(orderBy, undefined)).toBe(false) + expect(isOrderBySubset(orderBy, [])).toBe(false) + }) + + it(`should return true for identical orderBy`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(orderBy, orderBy)).toBe(true) + }) + + it(`should return true when subset is prefix of superset`, () => { + const subset: OrderBy = [orderByClause(ref(`age`), `asc`)] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(true) + }) + + it(`should return false when subset is not a prefix`, () => { + const subset: OrderBy = [orderByClause(ref(`name`), `desc`)] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) + + it(`should return false when directions differ`, () => { + const subset: OrderBy = [orderByClause(ref(`age`), `desc`)] + const superset: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) + + it(`should return false when subset is longer than superset`, () => { + const subset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + orderByClause(ref(`status`), `asc`), + ] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) +}) + +describe(`isLimitSubset`, () => { + it(`should return true for undefined subset`, () => { + expect(isLimitSubset(undefined, 10)).toBe(true) + }) + + it(`should return true for undefined superset`, () => { + expect(isLimitSubset(10, undefined)).toBe(true) + }) + + it(`should return true when subset <= superset`, () => { + expect(isLimitSubset(10, 20)).toBe(true) + expect(isLimitSubset(10, 10)).toBe(true) + }) + + it(`should return false when subset > superset`, () => { + expect(isLimitSubset(20, 10)).toBe(false) + }) +}) + +describe(`isPredicateSubset`, () => { + it(`should check all components`, () => { + const subset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(20)), + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 10, + } + const superset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(10)), + orderBy: [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ], + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(true) + }) + + it(`should return false if where is not subset`, () => { + const subset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(5)), + limit: 10, + } + const superset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false if orderBy is not subset`, () => { + const subset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(20)), + orderBy: [orderByClause(ref(`name`), `desc`)], + } + const superset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(10)), + orderBy: [orderByClause(ref(`age`), `asc`)], + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false if limit is not subset`, () => { + const subset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(20)), + limit: 30, + } + const superset: OnLoadMoreOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) +}) + +describe(`intersectPredicates`, () => { + it(`should return empty for empty array`, () => { + const result = intersectPredicates([]) + expect(result).toEqual({}) + }) + + it(`should return single predicate as-is`, () => { + const pred: OnLoadMoreOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + const result = intersectPredicates([pred]) + expect(result).toBe(pred) + }) + + it(`should produce false literal where clause for contradictory predicates`, () => { + const pred1: OnLoadMoreOptions = { where: eq(ref(`age`), val(5)) } + const pred2: OnLoadMoreOptions = { where: eq(ref(`age`), val(6)) } + const result = intersectPredicates([pred1, pred2]) + + expect(result.where).toBeDefined() + expect(result.where!.type).toBe(`val`) + expect((result.where as Value).value).toBe(false) + }) + + it(`should intersect where clauses`, () => { + const pred1: OnLoadMoreOptions = { where: gt(ref(`age`), val(10)) } + const pred2: OnLoadMoreOptions = { where: lt(ref(`age`), val(50)) } + const result = intersectPredicates([pred1, pred2]) + + expect(result.where).toBeDefined() + expect(result.where!.type).toBe(`func`) + expect((result.where as Func).name).toBe(`and`) + }) + + it(`should use first non-empty orderBy`, () => { + const orderBy1: OrderBy = [orderByClause(ref(`age`), `asc`)] + const pred1: OnLoadMoreOptions = { orderBy: orderBy1 } + const pred2: OnLoadMoreOptions = {} + const result = intersectPredicates([pred1, pred2]) + + expect(result.orderBy).toBe(orderBy1) + }) + + it(`should use minimum limit when all have limits (intersection = most restrictive)`, () => { + const pred1: OnLoadMoreOptions = { limit: 10 } + const pred2: OnLoadMoreOptions = { limit: 20 } + const pred3: OnLoadMoreOptions = { limit: 15 } + const result = intersectPredicates([pred1, pred2, pred3]) + + expect(result.limit).toBe(10) + }) + + it(`should use minimum limit even when some predicates are unlimited`, () => { + const pred1: OnLoadMoreOptions = { limit: 10 } + const pred2: OnLoadMoreOptions = {} // no limit = unlimited + const pred3: OnLoadMoreOptions = { limit: 20 } + const result = intersectPredicates([pred1, pred2, pred3]) + + expect(result.limit).toBe(10) + }) + + it(`should return undefined limit if all predicates are unlimited`, () => { + const pred1: OnLoadMoreOptions = {} + const pred2: OnLoadMoreOptions = {} + const result = intersectPredicates([pred1, pred2]) + + expect(result.limit).toBeUndefined() + }) +}) + +describe(`unionPredicates`, () => { + it(`should return empty for empty array`, () => { + const result = unionPredicates([]) + expect(result).toEqual({}) + }) + + it(`should return single predicate as-is`, () => { + const pred: OnLoadMoreOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + const result = unionPredicates([pred]) + expect(result).toBe(pred) + }) + + it(`should union where clauses`, () => { + const pred1: OnLoadMoreOptions = { where: gt(ref(`age`), val(10)) } + const pred2: OnLoadMoreOptions = { where: gt(ref(`age`), val(20)) } + const result = unionPredicates([pred1, pred2]) + + expect(result.where).toBeDefined() + expect(result.where!.type).toBe(`func`) + expect((result.where as Func).name).toBe(`gt`) + const value = ((result.where as Func).args[1] as Value).value + expect(value).toBe(10) // least restrictive + }) + + it(`should return undefined orderBy for union`, () => { + const orderBy1: OrderBy = [orderByClause(ref(`age`), `asc`)] + const pred1: OnLoadMoreOptions = { orderBy: orderBy1 } + const pred2: OnLoadMoreOptions = {} + const result = unionPredicates([pred1, pred2]) + + expect(result.orderBy).toBeUndefined() + }) + + it(`should use minimum limit when all have limits`, () => { + const pred1: OnLoadMoreOptions = { limit: 10 } + const pred2: OnLoadMoreOptions = { limit: 20 } + const pred3: OnLoadMoreOptions = { limit: 15 } + const result = unionPredicates([pred1, pred2, pred3]) + + expect(result.limit).toBe(10) + }) + + it(`should return undefined limit if any predicate is unlimited`, () => { + const pred1: OnLoadMoreOptions = { limit: 10 } + const pred2: OnLoadMoreOptions = {} // no limit = unlimited + const result = unionPredicates([pred1, pred2]) + + expect(result.limit).toBeUndefined() + }) +}) From 79e78033bdc09fd5667a475e0c0b9e98a2df35d5 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 11 Oct 2025 18:50:04 +0100 Subject: [PATCH 02/13] optimise in when all primatives --- packages/db/src/query/predicate-utils.ts | 108 ++++++++++++++++++++--- 1 file changed, 94 insertions(+), 14 deletions(-) diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts index 13dbc9d92..0697f6a46 100644 --- a/packages/db/src/query/predicate-utils.ts +++ b/packages/db/src/query/predicate-utils.ts @@ -131,8 +131,10 @@ function isWhereSubsetInternal( areRefsEqual(subsetFieldEq.ref, supersetFieldIn.ref) ) { // field = X is subset of field IN [X, Y, Z] if X is in the array + // Build Set once for the IN array to optimize lookup const inArray = supersetFieldIn.values - return inArray.some((val) => areValuesEqual(subsetFieldEq.value, val)) + const inSet = buildPrimitiveSet(inArray) + return arrayIncludesWithSet(inArray, subsetFieldEq.value, inSet) } } @@ -146,10 +148,11 @@ function isWhereSubsetInternal( areRefsEqual(subsetFieldIn.ref, supersetFieldIn.ref) ) { // field IN [A, B] is subset of field IN [A, B, C] if all values in subset are in superset + // Build Set once for the superset array and reuse for all subset lookups + const supersetArray = supersetFieldIn.values + const supersetSet = buildPrimitiveSet(supersetArray) return subsetFieldIn.values.every((subVal) => - supersetFieldIn.values.some((superVal) => - areValuesEqual(subVal, superVal) - ) + arrayIncludesWithSet(supersetArray, subVal, supersetSet) ) } } @@ -592,6 +595,76 @@ function areRefsEqual(a: PropRef, b: PropRef): boolean { return a.path.every((segment, i) => segment === b.path[i]) } +/** + * Check if a value is a primitive (string, number, boolean, null, undefined) + * Primitives can use Set for fast lookups + */ +function isPrimitive(value: any): boolean { + return ( + value === null || + value === undefined || + typeof value === `string` || + typeof value === `number` || + typeof value === `boolean` + ) +} + +/** + * Check if all values in an array are primitives + */ +function areAllPrimitives(values: Array): boolean { + return values.every(isPrimitive) +} + +/** + * Build a Set from an array if it contains only primitives and is large enough. + * Returns null if Set optimization is not applicable. + */ +function buildPrimitiveSet(array: Array): Set | null { + if (array.length > 10 && areAllPrimitives(array)) { + return new Set(array) + } + return null +} + +/** + * Check if a value is in an array, with optional pre-built Set for optimization. + * The primitiveSet should be built once using buildPrimitiveSet and reused for multiple lookups. + */ +function arrayIncludesWithSet( + array: Array, + value: any, + primitiveSet: Set | null +): boolean { + // Fast path: use pre-built Set for O(1) lookup + if (primitiveSet && isPrimitive(value)) { + return primitiveSet.has(value) + } + + // Fallback: use areValuesEqual for Dates and objects + return array.some((v) => areValuesEqual(v, value)) +} + +/** + * Intersect two arrays, with optional pre-built Set for optimization. + * The set2 should be built once using buildPrimitiveSet and reused. + */ +function intersectArraysWithSet( + arr1: Array, + arr2: Array, + set2: Set | null +): Array { + // Fast path: use pre-built Set for O(n) intersection + if (set2) { + // If set2 exists, arr2 contains ONLY primitives (that's when we build the Set). + // So we can skip non-primitives in arr1 immediately - they can't be in arr2. + return arr1.filter((v) => isPrimitive(v) && set2.has(v)) + } + + // Fallback: use areValuesEqual for all comparisons + return arr1.filter((v) => arr2.some((v2) => areValuesEqual(v, v2))) +} + /** * Get the maximum of two values, handling both numbers and Dates */ @@ -854,9 +927,10 @@ function intersectSameFieldPredicates( return { type: `val`, value: false } as BasicExpression } - // Check if it's in all IN sets (use areValuesEqual for Date support) + // Check if it's in all IN sets (build Sets once for each IN array) for (const inSet of inValueSets) { - if (!inSet.some((v) => areValuesEqual(v, eqValue))) { + const primitiveSet = buildPrimitiveSet(inSet) + if (!arrayIncludesWithSet(inSet, eqValue, primitiveSet)) { return { type: `val`, value: false } as BasicExpression } } @@ -872,19 +946,25 @@ function intersectSameFieldPredicates( })! } - // Handle intersection of multiple IN clauses (use areValuesEqual for Date support) + // Handle intersection of multiple IN clauses (build Sets once for each array) let intersectedInValues: Array | null = null if (inValueSets.length > 0) { + // Build primitive Sets for all IN value arrays upfront (scan each array once) + const inValuePrimitiveSets = inValueSets.map(buildPrimitiveSet) + intersectedInValues = [...inValueSets[0]!] for (let i = 1; i < inValueSets.length; i++) { - const currentSet = inValueSets[i]! - intersectedInValues = intersectedInValues.filter((v) => - currentSet.some((cv) => areValuesEqual(v, cv)) + const currentArray = inValueSets[i]! + const currentSet = inValuePrimitiveSets[i]! + intersectedInValues = intersectArraysWithSet( + intersectedInValues, + currentArray, + currentSet ) - } - // If intersection is empty, return false literal - if (intersectedInValues.length === 0) { - return { type: `val`, value: false } as BasicExpression + // Early exit if intersection becomes empty + if (intersectedInValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } } } From 7b7e569fcd1011f1bd265746779e594d1477fa0b Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 11 Oct 2025 20:14:09 +0100 Subject: [PATCH 03/13] optimisations --- packages/db/src/query/predicate-utils.ts | 121 ++++++++++++++--------- 1 file changed, 73 insertions(+), 48 deletions(-) diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts index 0697f6a46..f02de2296 100644 --- a/packages/db/src/query/predicate-utils.ts +++ b/packages/db/src/query/predicate-utils.ts @@ -131,10 +131,13 @@ function isWhereSubsetInternal( areRefsEqual(subsetFieldEq.ref, supersetFieldIn.ref) ) { // field = X is subset of field IN [X, Y, Z] if X is in the array - // Build Set once for the IN array to optimize lookup - const inArray = supersetFieldIn.values - const inSet = buildPrimitiveSet(inArray) - return arrayIncludesWithSet(inArray, subsetFieldEq.value, inSet) + // Use cached primitive set and metadata from extraction + return arrayIncludesWithSet( + supersetFieldIn.values, + subsetFieldEq.value, + supersetFieldIn.primitiveSet ?? null, + supersetFieldIn.areAllPrimitives + ) } } @@ -148,11 +151,14 @@ function isWhereSubsetInternal( areRefsEqual(subsetFieldIn.ref, supersetFieldIn.ref) ) { // field IN [A, B] is subset of field IN [A, B, C] if all values in subset are in superset - // Build Set once for the superset array and reuse for all subset lookups - const supersetArray = supersetFieldIn.values - const supersetSet = buildPrimitiveSet(supersetArray) + // Use cached primitive set and metadata from extraction return subsetFieldIn.values.every((subVal) => - arrayIncludesWithSet(supersetArray, subVal, supersetSet) + arrayIncludesWithSet( + supersetFieldIn.values, + subVal, + supersetFieldIn.primitiveSet ?? null, + supersetFieldIn.areAllPrimitives + ) ) } } @@ -571,18 +577,16 @@ function areValuesEqual(a: any, b: any): boolean { return a.getTime() === b.getTime() } - // For arrays and objects, use JSON comparison (simple but not perfect) + // For arrays and objects, use reference equality + // (In practice, we don't need deep equality for these cases - + // same object reference means same value for our use case) if ( typeof a === `object` && typeof b === `object` && a !== null && b !== null ) { - try { - return JSON.stringify(a) === JSON.stringify(b) - } catch { - return false - } + return a === b } return false @@ -616,29 +620,24 @@ function areAllPrimitives(values: Array): boolean { return values.every(isPrimitive) } -/** - * Build a Set from an array if it contains only primitives and is large enough. - * Returns null if Set optimization is not applicable. - */ -function buildPrimitiveSet(array: Array): Set | null { - if (array.length > 10 && areAllPrimitives(array)) { - return new Set(array) - } - return null -} - /** * Check if a value is in an array, with optional pre-built Set for optimization. - * The primitiveSet should be built once using buildPrimitiveSet and reused for multiple lookups. + * The primitiveSet is cached in InField during extraction and reused for all lookups. */ function arrayIncludesWithSet( array: Array, value: any, - primitiveSet: Set | null + primitiveSet: Set | null, + arrayIsAllPrimitives?: boolean ): boolean { // Fast path: use pre-built Set for O(1) lookup - if (primitiveSet && isPrimitive(value)) { - return primitiveSet.has(value) + if (primitiveSet) { + // Skip isPrimitive check if we know the value must be primitive for a match + // (if array is all primitives, only primitives can match) + if (arrayIsAllPrimitives || isPrimitive(value)) { + return primitiveSet.has(value) + } + return false // Non-primitive can't be in primitive-only set } // Fallback: use areValuesEqual for Dates and objects @@ -647,7 +646,7 @@ function arrayIncludesWithSet( /** * Intersect two arrays, with optional pre-built Set for optimization. - * The set2 should be built once using buildPrimitiveSet and reused. + * The set2 is cached in InField during extraction and reused for all operations. */ function intersectArraysWithSet( arr1: Array, @@ -734,6 +733,9 @@ function extractEqualityField(func: Func): ComparisonField | null { interface InField { ref: PropRef values: Array + // Cached optimization data (computed once, reused many times) + areAllPrimitives?: boolean + primitiveSet?: Set | null } function extractInField(func: Func): InField | null { @@ -746,9 +748,25 @@ function extractInField(func: Func): InField | null { secondArg?.type === `val` && Array.isArray(secondArg.value) ) { + let values = secondArg.value + // Precompute optimization metadata once + const allPrimitives = areAllPrimitives(values) + let primitiveSet: Set | null = null + + if (allPrimitives && values.length > 10) { + // Build Set and dedupe values at the same time + primitiveSet = new Set(values) + // If we found duplicates, use the deduped array going forward + if (primitiveSet.size < values.length) { + values = Array.from(primitiveSet) + } + } + return { ref: firstArg, - values: secondArg.value, + values, + areAllPrimitives: allPrimitives, + primitiveSet, } } } @@ -768,6 +786,10 @@ function isComparisonSubset( if (subOp === superOp) { if (subOp === `eq`) { // field = X is subset of field = X only + // Fast path: primitives can use strict equality + if (isPrimitive(subsetValue) && isPrimitive(supersetValue)) { + return subsetValue === supersetValue + } return areValuesEqual(subsetValue, supersetValue) } else if (subOp === `gt`) { // field > 20 is subset of field > 10 if 20 > 10 @@ -862,7 +884,7 @@ function intersectSameFieldPredicates( let maxLt: number | null = null let maxLte: number | null = null const eqValues: Set = new Set() - const inValueSets: Array> = [] + const inFields: Array = [] // Store full InField objects to access cached data const otherPredicates: Array> = [] for (const pred of predicates) { @@ -888,7 +910,7 @@ function intersectSameFieldPredicates( } else { const inField = extractInField(func) if (inField) { - inValueSets.push(inField.values) + inFields.push(inField) // Store full InField with cached primitiveSet } else { otherPredicates.push(pred) } @@ -927,10 +949,16 @@ function intersectSameFieldPredicates( return { type: `val`, value: false } as BasicExpression } - // Check if it's in all IN sets (build Sets once for each IN array) - for (const inSet of inValueSets) { - const primitiveSet = buildPrimitiveSet(inSet) - if (!arrayIncludesWithSet(inSet, eqValue, primitiveSet)) { + // Check if it's in all IN sets (use cached primitive sets and metadata) + for (const inField of inFields) { + if ( + !arrayIncludesWithSet( + inField.values, + eqValue, + inField.primitiveSet ?? null, + inField.areAllPrimitives + ) + ) { return { type: `val`, value: false } as BasicExpression } } @@ -946,20 +974,17 @@ function intersectSameFieldPredicates( })! } - // Handle intersection of multiple IN clauses (build Sets once for each array) + // Handle intersection of multiple IN clauses (use cached primitive sets) let intersectedInValues: Array | null = null - if (inValueSets.length > 0) { - // Build primitive Sets for all IN value arrays upfront (scan each array once) - const inValuePrimitiveSets = inValueSets.map(buildPrimitiveSet) - - intersectedInValues = [...inValueSets[0]!] - for (let i = 1; i < inValueSets.length; i++) { - const currentArray = inValueSets[i]! - const currentSet = inValuePrimitiveSets[i]! + if (inFields.length > 0) { + // All primitive sets already cached in inFields from extraction + intersectedInValues = [...inFields[0]!.values] + for (let i = 1; i < inFields.length; i++) { + const currentField = inFields[i]! intersectedInValues = intersectArraysWithSet( intersectedInValues, - currentArray, - currentSet + currentField.values, + currentField.primitiveSet ?? null ) // Early exit if intersection becomes empty if (intersectedInValues.length === 0) { From 440ce670980d1c3cc9fac93244512cbfa1735992 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Sat, 11 Oct 2025 20:18:04 +0100 Subject: [PATCH 04/13] changeset --- .changeset/light-phones-flash.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/light-phones-flash.md diff --git a/.changeset/light-phones-flash.md b/.changeset/light-phones-flash.md new file mode 100644 index 000000000..dd9e897b1 --- /dev/null +++ b/.changeset/light-phones-flash.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Add predicate comparison and merging utilities (isWhereSubset, intersectWherePredicates, unionWherePredicates, and related functions) to support predicate push-down in collection sync operations, enabling efficient tracking of loaded data ranges and preventing redundant server requests. Includes performance optimizations for large primitive IN predicates (100-1250x speedup via Set-based lookups) and full support for Date objects in equality, range, and IN clause comparisons. From 6e90c2cef69ce2c557336c65c9605bad57f694ef Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Mon, 13 Oct 2025 13:11:16 +0100 Subject: [PATCH 05/13] minusWherePredicates --- packages/db/src/query/index.ts | 1 + packages/db/src/query/predicate-utils.ts | 363 ++++++++++++++++++++++ packages/db/tests/predicate-utils.test.ts | 324 +++++++++++++++++++ 3 files changed, 688 insertions(+) diff --git a/packages/db/src/query/index.ts b/packages/db/src/query/index.ts index 5f25a462b..64c783ca6 100644 --- a/packages/db/src/query/index.ts +++ b/packages/db/src/query/index.ts @@ -63,6 +63,7 @@ export { isWhereSubset, intersectWherePredicates, unionWherePredicates, + minusWherePredicates, isOrderBySubset, isLimitSubset, isPredicateSubset, diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts index f02de2296..5118558fc 100644 --- a/packages/db/src/query/predicate-utils.ts +++ b/packages/db/src/query/predicate-utils.ts @@ -323,6 +323,369 @@ export function unionWherePredicates( } as BasicExpression } +/** + * Compute the difference between two where predicates: `fromPredicate AND NOT(subtractPredicate)`. + * Returns the simplified predicate, or null if the difference cannot be simplified + * (in which case the caller should fetch the full fromPredicate). + * + * @example + * // Range difference + * minusWherePredicates( + * gt(ref('age'), val(10)), // age > 10 + * gt(ref('age'), val(20)) // age > 20 + * ) // → age > 10 AND age <= 20 + * + * @example + * // Set difference + * minusWherePredicates( + * inOp(ref('status'), ['A', 'B', 'C', 'D']), // status IN ['A','B','C','D'] + * inOp(ref('status'), ['B', 'C']) // status IN ['B','C'] + * ) // → status IN ['A', 'D'] + * + * @example + * // Complete overlap - empty result + * minusWherePredicates( + * gt(ref('age'), val(20)), // age > 20 + * gt(ref('age'), val(10)) // age > 10 + * ) // → {type: 'val', value: false} (empty set) + * + * @param fromPredicate - The predicate to subtract from + * @param subtractPredicate - The predicate to subtract + * @returns The simplified difference, or null if cannot be simplified + */ +export function minusWherePredicates( + fromPredicate: BasicExpression | undefined, + subtractPredicate: BasicExpression | undefined +): BasicExpression | null { + // If nothing to subtract, return the original + if (subtractPredicate === undefined) { + return ( + fromPredicate ?? + ({ type: `val`, value: true } as BasicExpression) + ) + } + + // If from is undefined (all data), we can't simplify NOT(subtract) + // Return null to indicate caller should fetch all data + if (fromPredicate === undefined) { + return null + } + + // Check if fromPredicate is entirely contained in subtractPredicate + // In that case, fromPredicate AND NOT(subtractPredicate) = empty set + if (isWhereSubset(fromPredicate, subtractPredicate)) { + return { type: `val`, value: false } as BasicExpression + } + + // Check if they are on the same field - if so, we can try to simplify + if (fromPredicate.type === `func` && subtractPredicate.type === `func`) { + const result = minusSameFieldPredicates(fromPredicate, subtractPredicate) + if (result !== null) { + return result + } + } + + // Can't simplify - return null to indicate caller should fetch full fromPredicate + return null +} + +/** + * Helper function to compute difference for same-field predicates + */ +function minusSameFieldPredicates( + fromPred: Func, + subtractPred: Func +): BasicExpression | null { + // Extract field information + const fromField = + extractComparisonField(fromPred) || + extractEqualityField(fromPred) || + extractInField(fromPred) + const subtractField = + extractComparisonField(subtractPred) || + extractEqualityField(subtractPred) || + extractInField(subtractPred) + + // Must be on the same field + if ( + !fromField || + !subtractField || + !areRefsEqual(fromField.ref, subtractField.ref) + ) { + return null + } + + // Handle IN minus IN: status IN [A,B,C,D] - status IN [B,C] = status IN [A,D] + if (fromPred.name === `in` && subtractPred.name === `in`) { + const fromInField = fromField as InField + const subtractInField = subtractField as InField + + // Filter out values that are in the subtract set + const remainingValues = fromInField.values.filter( + (v) => + !arrayIncludesWithSet( + subtractInField.values, + v, + subtractInField.primitiveSet ?? null, + subtractInField.areAllPrimitives + ) + ) + + if (remainingValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (remainingValues.length === 1) { + return { + type: `func`, + name: `eq`, + args: [fromField.ref, { type: `val`, value: remainingValues[0] }], + } as BasicExpression + } + + return { + type: `func`, + name: `in`, + args: [fromField.ref, { type: `val`, value: remainingValues }], + } as BasicExpression + } + + // Handle IN minus equality: status IN [A,B,C] - status = B = status IN [A,C] + if (fromPred.name === `in` && subtractPred.name === `eq`) { + const fromInField = fromField as InField + const subtractValue = (subtractField as { ref: PropRef; value: any }).value + + const remainingValues = fromInField.values.filter( + (v) => !areValuesEqual(v, subtractValue) + ) + + if (remainingValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (remainingValues.length === 1) { + return { + type: `func`, + name: `eq`, + args: [fromField.ref, { type: `val`, value: remainingValues[0] }], + } as BasicExpression + } + + return { + type: `func`, + name: `in`, + args: [fromField.ref, { type: `val`, value: remainingValues }], + } as BasicExpression + } + + // Handle equality minus equality: age = 15 - age = 15 = empty, age = 15 - age = 20 = age = 15 + if (fromPred.name === `eq` && subtractPred.name === `eq`) { + const fromValue = (fromField as { ref: PropRef; value: any }).value + const subtractValue = (subtractField as { ref: PropRef; value: any }).value + + if (areValuesEqual(fromValue, subtractValue)) { + return { type: `val`, value: false } as BasicExpression + } + + // No overlap - return original + return fromPred as BasicExpression + } + + // Handle range minus range: age > 10 - age > 20 = age > 10 AND age <= 20 + const fromComp = extractComparisonField(fromPred) + const subtractComp = extractComparisonField(subtractPred) + + if ( + fromComp && + subtractComp && + areRefsEqual(fromComp.ref, subtractComp.ref) + ) { + // Try to compute the difference using range logic + const result = minusRangePredicates( + fromPred, + fromComp.value, + subtractPred, + subtractComp.value + ) + return result + } + + // Can't simplify + return null +} + +/** + * Helper to compute difference between range predicates + */ +function minusRangePredicates( + fromFunc: Func, + fromValue: any, + subtractFunc: Func, + subtractValue: any +): BasicExpression | null { + const fromOp = fromFunc.name as `gt` | `gte` | `lt` | `lte` | `eq` + const subtractOp = subtractFunc.name as `gt` | `gte` | `lt` | `lte` | `eq` + const ref = (extractComparisonField(fromFunc) || + extractEqualityField(fromFunc))!.ref + + // age > 10 - age > 20 = (age > 10 AND age <= 20) + if (fromOp === `gt` && subtractOp === `gt`) { + if (fromValue < subtractValue) { + // Result is: fromValue < field <= subtractValue + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + // fromValue >= subtractValue means no overlap + return fromFunc as BasicExpression + } + + // age >= 10 - age >= 20 = (age >= 10 AND age < 20) + if (fromOp === `gte` && subtractOp === `gte`) { + if (fromValue < subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age > 10 - age >= 20 = (age > 10 AND age < 20) + if (fromOp === `gt` && subtractOp === `gte`) { + if (fromValue < subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age >= 10 - age > 20 = (age >= 10 AND age <= 20) + if (fromOp === `gte` && subtractOp === `gt`) { + if (fromValue <= subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age < 30 - age < 20 = (age >= 20 AND age < 30) + if (fromOp === `lt` && subtractOp === `lt`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age <= 30 - age <= 20 = (age > 20 AND age <= 30) + if (fromOp === `lte` && subtractOp === `lte`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age < 30 - age <= 20 = (age > 20 AND age < 30) + if (fromOp === `lt` && subtractOp === `lte`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age <= 30 - age < 20 = (age >= 20 AND age <= 30) + if (fromOp === `lte` && subtractOp === `lt`) { + if (fromValue >= subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // Can't simplify other combinations + return null +} + /** * Check if one orderBy clause is a subset of another. * Returns true if the subset ordering requirements are satisfied by the superset ordering. diff --git a/packages/db/tests/predicate-utils.test.ts b/packages/db/tests/predicate-utils.test.ts index ce01c5137..e01e4d8a4 100644 --- a/packages/db/tests/predicate-utils.test.ts +++ b/packages/db/tests/predicate-utils.test.ts @@ -6,6 +6,7 @@ import { isOrderBySubset, isPredicateSubset, isWhereSubset, + minusWherePredicates, unionPredicates, unionWherePredicates, } from "../src/query/predicate-utils" @@ -1006,3 +1007,326 @@ describe(`unionPredicates`, () => { expect(result.limit).toBeUndefined() }) }) + +describe(`minusWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return original predicate when nothing to subtract`, () => { + const pred = gt(ref(`age`), val(10)) + const result = minusWherePredicates(pred, undefined) + + expect(result).toEqual(pred) + }) + + it(`should return null when from is undefined (can't simplify NOT(B))`, () => { + const subtract = gt(ref(`age`), val(10)) + const result = minusWherePredicates(undefined, subtract) + + expect(result).toBeNull() + }) + + it(`should return empty set when from is subset of subtract`, () => { + const from = gt(ref(`age`), val(20)) // age > 20 + const subtract = gt(ref(`age`), val(10)) // age > 10 + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return null when predicates are on different fields`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = eq(ref(`status`), val(`active`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toBeNull() + }) + }) + + describe(`IN minus IN`, () => { + it(`should compute set difference: IN [A,B,C,D] - IN [B,C] = IN [A,D]`, () => { + const from = inOp(ref(`status`), [`A`, `B`, `C`, `D`]) + const subtract = inOp(ref(`status`), [`B`, `C`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`status`), val([`A`, `D`])], + }) + }) + + it(`should return empty set when all values are subtracted`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`A`, `B`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return original when no overlap`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`C`, `D`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual(from) + }) + + it(`should collapse to equality when one value remains`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`B`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `eq`, + args: [ref(`status`), val(`A`)], + }) + }) + }) + + describe(`IN minus equality`, () => { + it(`should remove value from IN: IN [A,B,C] - eq(B) = IN [A,C]`, () => { + const from = inOp(ref(`status`), [`A`, `B`, `C`]) + const subtract = eq(ref(`status`), val(`B`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`status`), val([`A`, `C`])], + }) + }) + + it(`should collapse to equality when one value remains`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = eq(ref(`status`), val(`A`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `eq`, + args: [ref(`status`), val(`B`)], + }) + }) + + it(`should return empty set when removing last value`, () => { + const from = inOp(ref(`status`), [`A`]) + const subtract = eq(ref(`status`), val(`A`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + }) + + describe(`equality minus equality`, () => { + it(`should return empty set when same value`, () => { + const from = eq(ref(`age`), val(15)) + const subtract = eq(ref(`age`), val(15)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return original when different values`, () => { + const from = eq(ref(`age`), val(15)) + const subtract = eq(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual(from) + }) + }) + + describe(`range minus range - gt/gte`, () => { + it(`should compute difference: age > 10 - age > 20 = (age > 10 AND age <= 20)`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = gt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + + it(`should return original when no overlap: age > 20 - age > 10`, () => { + const from = gt(ref(`age`), val(20)) + const subtract = gt(ref(`age`), val(10)) + const result = minusWherePredicates(from, subtract) + + // age > 20 is subset of age > 10, so result is empty + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should compute difference: age >= 10 - age >= 20 = (age >= 10 AND age < 20)`, () => { + const from = gte(ref(`age`), val(10)) + const subtract = gte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(10)), lt(ref(`age`), val(20))], + }) + }) + + it(`should compute difference: age > 10 - age >= 20 = (age > 10 AND age < 20)`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = gte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lt(ref(`age`), val(20))], + }) + }) + + it(`should compute difference: age >= 10 - age > 20 = (age >= 10 AND age <= 20)`, () => { + const from = gte(ref(`age`), val(10)) + const subtract = gt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + }) + + describe(`range minus range - lt/lte`, () => { + it(`should compute difference: age < 30 - age < 20 = (age >= 20 AND age < 30)`, () => { + const from = lt(ref(`age`), val(30)) + const subtract = lt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(20)), lt(ref(`age`), val(30))], + }) + }) + + it(`should return original when no overlap: age < 20 - age < 30`, () => { + const from = lt(ref(`age`), val(20)) + const subtract = lt(ref(`age`), val(30)) + const result = minusWherePredicates(from, subtract) + + // age < 20 is subset of age < 30, so result is empty + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should compute difference: age <= 30 - age <= 20 = (age > 20 AND age <= 30)`, () => { + const from = lte(ref(`age`), val(30)) + const subtract = lte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(20)), lte(ref(`age`), val(30))], + }) + }) + + it(`should compute difference: age < 30 - age <= 20 = (age > 20 AND age < 30)`, () => { + const from = lt(ref(`age`), val(30)) + const subtract = lte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(20)), lt(ref(`age`), val(30))], + }) + }) + + it(`should compute difference: age <= 30 - age < 20 = (age >= 20 AND age <= 30)`, () => { + const from = lte(ref(`age`), val(30)) + const subtract = lt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(20)), lte(ref(`age`), val(30))], + }) + }) + }) + + describe(`Date support`, () => { + it(`should handle Date IN minus Date IN`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + const from = inOp(ref(`createdAt`), [date1, date2, date3]) + const subtract = inOp(ref(`createdAt`), [date2]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`createdAt`), val([date1, date3])], + }) + }) + + it(`should handle Date range difference: date > 2024-01-01 - date > 2024-01-15`, () => { + const date1 = new Date(`2024-01-01`) + const date15 = new Date(`2024-01-15`) + + const from = gt(ref(`createdAt`), val(date1)) + const subtract = gt(ref(`createdAt`), val(date15)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [ + gt(ref(`createdAt`), val(date1)), + lte(ref(`createdAt`), val(date15)), + ], + }) + }) + }) + + describe(`real-world sync scenarios`, () => { + it(`should compute missing data range: need age > 10, already have age > 20`, () => { + const requested = gt(ref(`age`), val(10)) + const alreadyLoaded = gt(ref(`age`), val(20)) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Need to fetch: 10 < age <= 20 + expect(needToFetch).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + + it(`should compute missing IDs: need IN [1..100], already have IN [50..100]`, () => { + const allIds = Array.from({ length: 100 }, (_, i) => i + 1) + const loadedIds = Array.from({ length: 51 }, (_, i) => i + 50) + + const requested = inOp(ref(`id`), allIds) + const alreadyLoaded = inOp(ref(`id`), loadedIds) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Need to fetch: ids 1..49 + const expectedIds = Array.from({ length: 49 }, (_, i) => i + 1) + expect(needToFetch).toEqual({ + type: `func`, + name: `in`, + args: [ref(`id`), val(expectedIds)], + }) + }) + + it(`should return empty when all requested data is already loaded`, () => { + const requested = gt(ref(`age`), val(20)) + const alreadyLoaded = gt(ref(`age`), val(10)) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Requested is subset of already loaded - nothing more to fetch + expect(needToFetch).toEqual({ type: `val`, value: false }) + }) + }) +}) From 113503b351bd65fe9a5d4f26897bdb9ae1715898 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Tue, 14 Oct 2025 15:03:34 +0200 Subject: [PATCH 06/13] Add unit test for OR in both subset and superset that shows bug with OR handling --- packages/db/tests/predicate-utils.test.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/db/tests/predicate-utils.test.ts b/packages/db/tests/predicate-utils.test.ts index e01e4d8a4..35333335b 100644 --- a/packages/db/tests/predicate-utils.test.ts +++ b/packages/db/tests/predicate-utils.test.ts @@ -270,6 +270,15 @@ describe(`isWhereSubset`, () => { ).toBe(true) }) + it(`should handle OR in both: (age > 20 OR status = 'active') is subset of (age > 10 OR status = 'active')`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), eq(ref(`status`), val(`active`))), + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + it(`should handle OR in subset: (A OR B) is NOT subset of C if either is not a subset`, () => { expect( isWhereSubset( From 508ece0ed1de91c1d5e3d1291dfe2aedd2a7faf2 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Tue, 14 Oct 2025 15:05:14 +0200 Subject: [PATCH 07/13] Reorder OR handling to fix bug and handle AND similarly --- packages/db/src/query/predicate-utils.ts | 41 ++++++++---------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts index 5118558fc..b5f7155b4 100644 --- a/packages/db/src/query/predicate-utils.ts +++ b/packages/db/src/query/predicate-utils.ts @@ -51,35 +51,27 @@ function isWhereSubsetInternal( return true } + // Handle superset being an AND: subset must imply ALL conjuncts + // If superset is (A AND B), then subset ⊆ (A AND B) only if subset ⊆ A AND subset ⊆ B + // Example: (age > 20) ⊆ (age > 10 AND status = 'active') is false (doesn't imply status condition) + if (superset.type === `func` && superset.name === `and`) { + return superset.args.every((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + // Handle subset being an AND: (A AND B) implies both A and B if (subset.type === `func` && subset.name === `and`) { - // Special case: if superset is also AND, check if all conjuncts in superset are implied by subset - if (superset.type === `func` && superset.name === `and`) { - // For (A AND B) ⊆ (C AND D), we need every conjunct in superset to be implied by subset - // For each conjunct in superset, at least one conjunct in subset must be a subset of it - // OR the entire subset implies it - return superset.args.every((superArg) => { - // Check if any conjunct in subset is a subset of this superset conjunct - return subset.args.some((subArg) => - isWhereSubsetInternal( - subArg as BasicExpression, - superArg as BasicExpression - ) - ) - }) - } // For (A AND B) ⊆ C, since (A AND B) implies A, we check if any conjunct implies C return subset.args.some((arg) => isWhereSubsetInternal(arg as BasicExpression, superset) ) } - // Handle superset being an AND: subset must imply ALL conjuncts - // If superset is (A AND B), then subset ⊆ (A AND B) only if subset ⊆ A AND subset ⊆ B - // Example: (age > 20) ⊆ (age > 10 AND status = 'active') is false (doesn't imply status condition) - if (superset.type === `func` && superset.name === `and`) { - return superset.args.every((arg) => - isWhereSubsetInternal(subset, arg as BasicExpression) + // Handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C + if (subset.type === `func` && subset.name === `or`) { + return subset.args.every((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) ) } @@ -92,13 +84,6 @@ function isWhereSubsetInternal( ) } - // Handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C - if (subset.type === `func` && subset.name === `or`) { - return subset.args.every((arg) => - isWhereSubsetInternal(arg as BasicExpression, superset) - ) - } - // Handle comparison operators on the same field if (subset.type === `func` && superset.type === `func`) { const subsetFunc = subset as Func From e837494769cacd2e0140e04b977afdbab7241394 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 15 Oct 2025 14:21:41 +0100 Subject: [PATCH 08/13] change chageset --- .changeset/light-phones-flash.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changeset/light-phones-flash.md b/.changeset/light-phones-flash.md index dd9e897b1..95a030b32 100644 --- a/.changeset/light-phones-flash.md +++ b/.changeset/light-phones-flash.md @@ -2,4 +2,4 @@ "@tanstack/db": patch --- -Add predicate comparison and merging utilities (isWhereSubset, intersectWherePredicates, unionWherePredicates, and related functions) to support predicate push-down in collection sync operations, enabling efficient tracking of loaded data ranges and preventing redundant server requests. Includes performance optimizations for large primitive IN predicates (100-1250x speedup via Set-based lookups) and full support for Date objects in equality, range, and IN clause comparisons. +Add predicate comparison and merging utilities (isWhereSubset, intersectWherePredicates, unionWherePredicates, and related functions) to support predicate push-down in collection sync operations, enabling efficient tracking of loaded data ranges and preventing redundant server requests. Includes performance optimizations for large primitive IN predicates and full support for Date objects in equality, range, and IN clause comparisons. From 0c928a5594ce49d191b49c69a7e82e2caf39e1c4 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 15 Oct 2025 14:39:02 +0100 Subject: [PATCH 09/13] dedupe code --- packages/db/src/query/predicate-utils.ts | 481 +++++++--------------- packages/db/tests/predicate-utils.test.ts | 68 +-- 2 files changed, 194 insertions(+), 355 deletions(-) diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts index b5f7155b4..b979a5550 100644 --- a/packages/db/src/query/predicate-utils.ts +++ b/packages/db/src/query/predicate-utils.ts @@ -1,5 +1,5 @@ import type { BasicExpression, Func, OrderBy, PropRef } from "./ir.js" -import type { OnLoadMoreOptions } from "../types.js" +import type { LoadSubsetOptions } from "../types.js" /** * Check if one where clause is a logical subset of another. @@ -154,44 +154,30 @@ function isWhereSubsetInternal( } /** - * Combine multiple where predicates with AND logic (intersection). - * Returns a predicate that is satisfied only when all input predicates are satisfied. - * Simplifies when possible (e.g., age > 10 AND age > 20 → age > 20). - * Returns a false literal if predicates are contradictory (empty set). - * - * @example - * // Take most restrictive - * intersectWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 20 - * - * @example - * // Different fields combine with AND - * intersectWherePredicates([gt(ref('age'), val(10)), eq(ref('status'), val('active'))]) - * // age > 10 AND status = 'active' - * - * @example - * // Contradictory predicates return false - * intersectWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(6))]) - * // {type: 'val', value: false} - * - * @param predicates - Array of where predicates to intersect - * @returns Combined predicate representing the intersection, or false literal for empty set + * Helper to combine where predicates with common logic for AND/OR operations */ -export function intersectWherePredicates( - predicates: Array> +function combineWherePredicates( + predicates: Array>, + operation: `and` | `or`, + simplifyFn: ( + preds: Array> + ) => BasicExpression | null ): BasicExpression { + const emptyValue = operation === `and` ? true : false + const identityValue = operation === `and` ? true : false + if (predicates.length === 0) { - // No predicates means no filter (true) - return { type: `val`, value: true } as BasicExpression + return { type: `val`, value: emptyValue } as BasicExpression } if (predicates.length === 1) { return predicates[0]! } - // Flatten any AND expressions + // Flatten nested expressions of the same operation const flatPredicates: Array> = [] for (const pred of predicates) { - if (pred.type === `func` && pred.name === `and`) { + if (pred.type === `func` && pred.name === operation) { flatPredicates.push(...(pred.args as Array>)) } else { flatPredicates.push(pred) @@ -208,34 +194,72 @@ export function intersectWherePredicates( // Complex predicates that we can't group by field simplified.push(...preds) } else { - // Try to simplify same-field predicates with AND logic - const result = intersectSameFieldPredicates(preds) - // Check if result is a false literal (empty set) - if (result.type === `val` && result.value === false) { + // Try to simplify same-field predicates + const result = simplifyFn(preds) + + // For intersection: check for empty set (contradiction) + if ( + operation === `and` && + result && + result.type === `val` && + result.value === false + ) { // Intersection is empty (conflicting constraints) - entire AND is false return { type: `val`, value: false } as BasicExpression - } else { + } + + // For union: result may be null if simplification failed + if (result) { simplified.push(result) } } } if (simplified.length === 0) { - return { type: `val`, value: true } as BasicExpression + return { type: `val`, value: identityValue } as BasicExpression } if (simplified.length === 1) { return simplified[0]! } - // Return AND of all simplified predicates + // Return combined predicate return { type: `func`, - name: `and`, + name: operation, args: simplified, } as BasicExpression } +/** + * Combine multiple where predicates with AND logic (intersection). + * Returns a predicate that is satisfied only when all input predicates are satisfied. + * Simplifies when possible (e.g., age > 10 AND age > 20 → age > 20). + * Returns a false literal if predicates are contradictory (empty set). + * + * @example + * // Take most restrictive + * intersectWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 20 + * + * @example + * // Different fields combine with AND + * intersectWherePredicates([gt(ref('age'), val(10)), eq(ref('status'), val('active'))]) + * // age > 10 AND status = 'active' + * + * @example + * // Contradictory predicates return false + * intersectWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(6))]) + * // {type: 'val', value: false} + * + * @param predicates - Array of where predicates to intersect + * @returns Combined predicate representing the intersection, or false literal for empty set + */ +export function intersectWherePredicates( + predicates: Array> +): BasicExpression { + return combineWherePredicates(predicates, `and`, intersectSameFieldPredicates) +} + /** * Combine multiple where predicates with OR logic (union). * Returns a predicate that is satisfied when any input predicate is satisfied. @@ -255,57 +279,7 @@ export function intersectWherePredicates( export function unionWherePredicates( predicates: Array> ): BasicExpression { - if (predicates.length === 0) { - // No predicates means no data matches (false) - return { type: `val`, value: false } as BasicExpression - } - - if (predicates.length === 1) { - return predicates[0]! - } - - // Flatten any OR expressions - const flatPredicates: Array> = [] - for (const pred of predicates) { - if (pred.type === `func` && pred.name === `or`) { - flatPredicates.push(...(pred.args as Array>)) - } else { - flatPredicates.push(pred) - } - } - - // Group predicates by field for simplification - const grouped = groupPredicatesByField(flatPredicates) - - // Simplify each group - const simplified: Array> = [] - for (const [field, preds] of grouped.entries()) { - if (field === null) { - // Complex predicates that we can't group by field - simplified.push(...preds) - } else { - // Try to simplify same-field predicates with OR logic - const result = unionSameFieldPredicates(preds) - if (result) { - simplified.push(result) - } - } - } - - if (simplified.length === 0) { - return { type: `val`, value: false } as BasicExpression - } - - if (simplified.length === 1) { - return simplified[0]! - } - - // Return OR of all simplified predicates - return { - type: `func`, - name: `or`, - args: simplified, - } as BasicExpression + return combineWherePredicates(predicates, `or`, unionSameFieldPredicates) } /** @@ -771,8 +745,8 @@ export function isLimitSubset( * @returns true if subset is satisfied by superset */ export function isPredicateSubset( - subset: OnLoadMoreOptions, - superset: OnLoadMoreOptions + subset: LoadSubsetOptions, + superset: LoadSubsetOptions ): boolean { return ( isWhereSubset(subset.where, superset.where) && @@ -782,16 +756,15 @@ export function isPredicateSubset( } /** - * Merge multiple predicates by intersecting their where clauses. - * Intersection semantics: returns predicate satisfied by data matching ALL input predicates. - * For limits, this means the MINIMUM (most restrictive) limit. - * - * @param predicates - Array of predicates to merge - * @returns Combined predicate representing the intersection + * Helper to combine predicates (where + orderBy + limit) */ -export function intersectPredicates( - predicates: Array -): OnLoadMoreOptions { +function combinePredicates( + predicates: Array, + operation: `intersect` | `union`, + whereFn: ( + clauses: Array> + ) => BasicExpression +): LoadSubsetOptions { if (predicates.length === 0) { return {} } @@ -800,28 +773,33 @@ export function intersectPredicates( return predicates[0]! } - // Intersect where clauses + // Combine where clauses const whereClauses = predicates .map((p) => p.where) .filter((w): w is BasicExpression => w !== undefined) const mergedWhere = - whereClauses.length > 0 ? intersectWherePredicates(whereClauses) : undefined + whereClauses.length > 0 ? whereFn(whereClauses) : undefined - // Use first non-empty orderBy (they should be compatible if predicates are related) - const mergedOrderBy = predicates.find( - (p) => p.orderBy && p.orderBy.length > 0 - )?.orderBy + // OrderBy logic differs by operation + const mergedOrderBy = + operation === `intersect` + ? predicates.find((p) => p.orderBy && p.orderBy.length > 0)?.orderBy + : undefined // Union: different orderings can't be combined - // Use minimum limit (most restrictive - intersection must satisfy all constraints) - // If any predicate is unlimited, the intersection is limited by the others + // Limit logic const limits = predicates .map((p) => p.limit) .filter((l): l is number => l !== undefined) + const mergedLimit = - limits.length === 0 - ? undefined // All unlimited = result unlimited - : Math.min(...limits) // Take most restrictive + operation === `intersect` + ? limits.length === 0 + ? undefined + : Math.min(...limits) // All unlimited = unlimited, else min + : limits.length === predicates.length && limits.length > 0 + ? Math.min(...limits) + : undefined // Min only if all have limits return { where: mergedWhere, @@ -830,6 +808,20 @@ export function intersectPredicates( } } +/** + * Merge multiple predicates by intersecting their where clauses. + * Intersection semantics: returns predicate satisfied by data matching ALL input predicates. + * For limits, this means the MINIMUM (most restrictive) limit. + * + * @param predicates - Array of predicates to merge + * @returns Combined predicate representing the intersection + */ +export function intersectPredicates( + predicates: Array +): LoadSubsetOptions { + return combinePredicates(predicates, `intersect`, intersectWherePredicates) +} + /** * Merge multiple predicates by unioning their where clauses. * @@ -837,48 +829,33 @@ export function intersectPredicates( * @returns Combined predicate */ export function unionPredicates( - predicates: Array -): OnLoadMoreOptions { - if (predicates.length === 0) { - return {} - } - - if (predicates.length === 1) { - return predicates[0]! - } - - // Union where clauses - const whereClauses = predicates - .map((p) => p.where) - .filter((w): w is BasicExpression => w !== undefined) - - const mergedWhere = - whereClauses.length > 0 ? unionWherePredicates(whereClauses) : undefined - - // For union, orderBy doesn't really make sense (different orderings) - // Return undefined - const mergedOrderBy = undefined - - // For union, take minimum limit if all have limits - const limits = predicates - .map((p) => p.limit) - .filter((l): l is number => l !== undefined) - const mergedLimit = - limits.length === predicates.length && limits.length > 0 - ? Math.min(...limits) - : undefined - - return { - where: mergedWhere, - orderBy: mergedOrderBy, - limit: mergedLimit, - } + predicates: Array +): LoadSubsetOptions { + return combinePredicates(predicates, `union`, unionWherePredicates) } // ============================================================================ // Helper functions // ============================================================================ +/** + * Find a predicate with a specific operator and value + */ +function findPredicateWithOperator( + predicates: Array>, + operator: string, + value: any +): BasicExpression | undefined { + return predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + const field = extractComparisonField(f) + return f.name === operator && field && areValuesEqual(field.value, value) + } + return false + }) +} + function areExpressionsEqual(a: BasicExpression, b: BasicExpression): boolean { if (a.type !== b.type) { return false @@ -1347,96 +1324,32 @@ function intersectSameFieldPredicates( // Choose the most restrictive lower bound if (minGt !== null && minGte !== null) { // If we have both > and >=, use > if it's more restrictive - if (minGt >= minGte) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `gt` && extractComparisonField(f)?.value === minGt - } - return false - })! - ) - } else { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return ( - f.name === `gte` && extractComparisonField(f)?.value === minGte - ) - } - return false - })! - ) - } + const pred = + minGt >= minGte + ? findPredicateWithOperator(predicates, `gt`, minGt) + : findPredicateWithOperator(predicates, `gte`, minGte) + if (pred) result.push(pred) } else if (minGt !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `gt` && extractComparisonField(f)?.value === minGt - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `gt`, minGt) + if (pred) result.push(pred) } else if (minGte !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `gte` && extractComparisonField(f)?.value === minGte - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `gte`, minGte) + if (pred) result.push(pred) } // Choose the most restrictive upper bound if (maxLt !== null && maxLte !== null) { - if (maxLt <= maxLte) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `lt` && extractComparisonField(f)?.value === maxLt - } - return false - })! - ) - } else { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return ( - f.name === `lte` && extractComparisonField(f)?.value === maxLte - ) - } - return false - })! - ) - } + const pred = + maxLt <= maxLte + ? findPredicateWithOperator(predicates, `lt`, maxLt) + : findPredicateWithOperator(predicates, `lte`, maxLte) + if (pred) result.push(pred) } else if (maxLt !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `lt` && extractComparisonField(f)?.value === maxLt - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `lt`, maxLt) + if (pred) result.push(pred) } else if (maxLte !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `lte` && extractComparisonField(f)?.value === maxLte - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `lte`, maxLte) + if (pred) result.push(pred) } // Add intersected IN values if present @@ -1565,112 +1478,38 @@ function unionSameFieldPredicates( // Choose the least restrictive lower bound if (maxGt !== null && maxGte !== null) { // Take the smaller one (less restrictive) - if (maxGte <= maxGt) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return ( - f.name === `gte` && extractComparisonField(f)?.value === maxGte - ) - } - return false - })! - ) - } else { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `gt` && extractComparisonField(f)?.value === maxGt - } - return false - })! - ) - } + const pred = + maxGte <= maxGt + ? findPredicateWithOperator(predicates, `gte`, maxGte) + : findPredicateWithOperator(predicates, `gt`, maxGt) + if (pred) result.push(pred) } else if (maxGt !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `gt` && extractComparisonField(f)?.value === maxGt - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `gt`, maxGt) + if (pred) result.push(pred) } else if (maxGte !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `gte` && extractComparisonField(f)?.value === maxGte - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `gte`, maxGte) + if (pred) result.push(pred) } // Choose the least restrictive upper bound if (minLt !== null && minLte !== null) { - if (minLte >= minLt) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return ( - f.name === `lte` && extractComparisonField(f)?.value === minLte - ) - } - return false - })! - ) - } else { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `lt` && extractComparisonField(f)?.value === minLt - } - return false - })! - ) - } + const pred = + minLte >= minLt + ? findPredicateWithOperator(predicates, `lte`, minLte) + : findPredicateWithOperator(predicates, `lt`, minLt) + if (pred) result.push(pred) } else if (minLt !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `lt` && extractComparisonField(f)?.value === minLt - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `lt`, minLt) + if (pred) result.push(pred) } else if (minLte !== null) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return f.name === `lte` && extractComparisonField(f)?.value === minLte - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `lte`, minLte) + if (pred) result.push(pred) } // Add single eq value if (eqValues.size === 1 && inValues.size === 0) { - result.push( - predicates.find((p) => { - if (p.type === `func`) { - const f = p as Func - return ( - f.name === `eq` && - extractComparisonField(f)?.value === [...eqValues][0] - ) - } - return false - })! - ) + const pred = findPredicateWithOperator(predicates, `eq`, [...eqValues][0]) + if (pred) result.push(pred) } // Add IN if only IN values diff --git a/packages/db/tests/predicate-utils.test.ts b/packages/db/tests/predicate-utils.test.ts index 35333335b..26849a204 100644 --- a/packages/db/tests/predicate-utils.test.ts +++ b/packages/db/tests/predicate-utils.test.ts @@ -12,7 +12,7 @@ import { } from "../src/query/predicate-utils" import { Func, PropRef, Value } from "../src/query/ir" import type { BasicExpression, OrderBy, OrderByClause } from "../src/query/ir" -import type { OnLoadMoreOptions } from "../src/types" +import type { LoadSubsetOptions } from "../src/types" // Helper functions to build expressions more easily function ref(path: string | Array): PropRef { @@ -839,12 +839,12 @@ describe(`isLimitSubset`, () => { describe(`isPredicateSubset`, () => { it(`should check all components`, () => { - const subset: OnLoadMoreOptions = { + const subset: LoadSubsetOptions = { where: gt(ref(`age`), val(20)), orderBy: [orderByClause(ref(`age`), `asc`)], limit: 10, } - const superset: OnLoadMoreOptions = { + const superset: LoadSubsetOptions = { where: gt(ref(`age`), val(10)), orderBy: [ orderByClause(ref(`age`), `asc`), @@ -856,11 +856,11 @@ describe(`isPredicateSubset`, () => { }) it(`should return false if where is not subset`, () => { - const subset: OnLoadMoreOptions = { + const subset: LoadSubsetOptions = { where: gt(ref(`age`), val(5)), limit: 10, } - const superset: OnLoadMoreOptions = { + const superset: LoadSubsetOptions = { where: gt(ref(`age`), val(10)), limit: 20, } @@ -868,11 +868,11 @@ describe(`isPredicateSubset`, () => { }) it(`should return false if orderBy is not subset`, () => { - const subset: OnLoadMoreOptions = { + const subset: LoadSubsetOptions = { where: gt(ref(`age`), val(20)), orderBy: [orderByClause(ref(`name`), `desc`)], } - const superset: OnLoadMoreOptions = { + const superset: LoadSubsetOptions = { where: gt(ref(`age`), val(10)), orderBy: [orderByClause(ref(`age`), `asc`)], } @@ -880,11 +880,11 @@ describe(`isPredicateSubset`, () => { }) it(`should return false if limit is not subset`, () => { - const subset: OnLoadMoreOptions = { + const subset: LoadSubsetOptions = { where: gt(ref(`age`), val(20)), limit: 30, } - const superset: OnLoadMoreOptions = { + const superset: LoadSubsetOptions = { where: gt(ref(`age`), val(10)), limit: 20, } @@ -899,7 +899,7 @@ describe(`intersectPredicates`, () => { }) it(`should return single predicate as-is`, () => { - const pred: OnLoadMoreOptions = { + const pred: LoadSubsetOptions = { where: gt(ref(`age`), val(10)), limit: 20, } @@ -908,8 +908,8 @@ describe(`intersectPredicates`, () => { }) it(`should produce false literal where clause for contradictory predicates`, () => { - const pred1: OnLoadMoreOptions = { where: eq(ref(`age`), val(5)) } - const pred2: OnLoadMoreOptions = { where: eq(ref(`age`), val(6)) } + const pred1: LoadSubsetOptions = { where: eq(ref(`age`), val(5)) } + const pred2: LoadSubsetOptions = { where: eq(ref(`age`), val(6)) } const result = intersectPredicates([pred1, pred2]) expect(result.where).toBeDefined() @@ -918,8 +918,8 @@ describe(`intersectPredicates`, () => { }) it(`should intersect where clauses`, () => { - const pred1: OnLoadMoreOptions = { where: gt(ref(`age`), val(10)) } - const pred2: OnLoadMoreOptions = { where: lt(ref(`age`), val(50)) } + const pred1: LoadSubsetOptions = { where: gt(ref(`age`), val(10)) } + const pred2: LoadSubsetOptions = { where: lt(ref(`age`), val(50)) } const result = intersectPredicates([pred1, pred2]) expect(result.where).toBeDefined() @@ -929,34 +929,34 @@ describe(`intersectPredicates`, () => { it(`should use first non-empty orderBy`, () => { const orderBy1: OrderBy = [orderByClause(ref(`age`), `asc`)] - const pred1: OnLoadMoreOptions = { orderBy: orderBy1 } - const pred2: OnLoadMoreOptions = {} + const pred1: LoadSubsetOptions = { orderBy: orderBy1 } + const pred2: LoadSubsetOptions = {} const result = intersectPredicates([pred1, pred2]) expect(result.orderBy).toBe(orderBy1) }) it(`should use minimum limit when all have limits (intersection = most restrictive)`, () => { - const pred1: OnLoadMoreOptions = { limit: 10 } - const pred2: OnLoadMoreOptions = { limit: 20 } - const pred3: OnLoadMoreOptions = { limit: 15 } + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = { limit: 20 } + const pred3: LoadSubsetOptions = { limit: 15 } const result = intersectPredicates([pred1, pred2, pred3]) expect(result.limit).toBe(10) }) it(`should use minimum limit even when some predicates are unlimited`, () => { - const pred1: OnLoadMoreOptions = { limit: 10 } - const pred2: OnLoadMoreOptions = {} // no limit = unlimited - const pred3: OnLoadMoreOptions = { limit: 20 } + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = {} // no limit = unlimited + const pred3: LoadSubsetOptions = { limit: 20 } const result = intersectPredicates([pred1, pred2, pred3]) expect(result.limit).toBe(10) }) it(`should return undefined limit if all predicates are unlimited`, () => { - const pred1: OnLoadMoreOptions = {} - const pred2: OnLoadMoreOptions = {} + const pred1: LoadSubsetOptions = {} + const pred2: LoadSubsetOptions = {} const result = intersectPredicates([pred1, pred2]) expect(result.limit).toBeUndefined() @@ -970,7 +970,7 @@ describe(`unionPredicates`, () => { }) it(`should return single predicate as-is`, () => { - const pred: OnLoadMoreOptions = { + const pred: LoadSubsetOptions = { where: gt(ref(`age`), val(10)), limit: 20, } @@ -979,8 +979,8 @@ describe(`unionPredicates`, () => { }) it(`should union where clauses`, () => { - const pred1: OnLoadMoreOptions = { where: gt(ref(`age`), val(10)) } - const pred2: OnLoadMoreOptions = { where: gt(ref(`age`), val(20)) } + const pred1: LoadSubsetOptions = { where: gt(ref(`age`), val(10)) } + const pred2: LoadSubsetOptions = { where: gt(ref(`age`), val(20)) } const result = unionPredicates([pred1, pred2]) expect(result.where).toBeDefined() @@ -992,25 +992,25 @@ describe(`unionPredicates`, () => { it(`should return undefined orderBy for union`, () => { const orderBy1: OrderBy = [orderByClause(ref(`age`), `asc`)] - const pred1: OnLoadMoreOptions = { orderBy: orderBy1 } - const pred2: OnLoadMoreOptions = {} + const pred1: LoadSubsetOptions = { orderBy: orderBy1 } + const pred2: LoadSubsetOptions = {} const result = unionPredicates([pred1, pred2]) expect(result.orderBy).toBeUndefined() }) it(`should use minimum limit when all have limits`, () => { - const pred1: OnLoadMoreOptions = { limit: 10 } - const pred2: OnLoadMoreOptions = { limit: 20 } - const pred3: OnLoadMoreOptions = { limit: 15 } + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = { limit: 20 } + const pred3: LoadSubsetOptions = { limit: 15 } const result = unionPredicates([pred1, pred2, pred3]) expect(result.limit).toBe(10) }) it(`should return undefined limit if any predicate is unlimited`, () => { - const pred1: OnLoadMoreOptions = { limit: 10 } - const pred2: OnLoadMoreOptions = {} // no limit = unlimited + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = {} // no limit = unlimited const result = unionPredicates([pred1, pred2]) expect(result.limit).toBeUndefined() From e6d8e34a7db632fbaaedf426b343eb66811a0abf Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 15 Oct 2025 15:11:43 +0100 Subject: [PATCH 10/13] add createDeduplicatedLoadSubset function --- packages/db/src/query/index.ts | 2 + packages/db/src/query/subset-dedupe.ts | 98 ++++++++ packages/db/tests/subset-dedupe.test.ts | 309 ++++++++++++++++++++++++ 3 files changed, 409 insertions(+) create mode 100644 packages/db/src/query/subset-dedupe.ts create mode 100644 packages/db/tests/subset-dedupe.test.ts diff --git a/packages/db/src/query/index.ts b/packages/db/src/query/index.ts index 64c783ca6..66f5f2d94 100644 --- a/packages/db/src/query/index.ts +++ b/packages/db/src/query/index.ts @@ -70,3 +70,5 @@ export { intersectPredicates, unionPredicates, } from "./predicate-utils.js" + +export { createDeduplicatedLoadSubset } from "./subset-dedupe.js" diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts new file mode 100644 index 000000000..e970dc5cf --- /dev/null +++ b/packages/db/src/query/subset-dedupe.ts @@ -0,0 +1,98 @@ +import { + isPredicateSubset, + isWhereSubset, + unionWherePredicates, +} from "./predicate-utils.js" +import type { BasicExpression } from "./ir.js" +import type { LoadSubsetOptions } from "../types.js" + +/** + * Creates a deduplicated wrapper around a loadSubset function. + * Tracks what data has been loaded and avoids redundant calls. + * + * @param loadSubset - The underlying loadSubset function to wrap + * @returns A wrapped function that deduplicates calls based on loaded predicates + * + * @example + * const deduplicatedLoadSubset = createDeduplicatedLoadSubset(myLoadSubset) + * + * // First call - fetches data + * await deduplicatedLoadSubset({ where: gt(ref('age'), val(10)) }) + * + * // Second call - subset of first, returns true immediately + * await deduplicatedLoadSubset({ where: gt(ref('age'), val(20)) }) + */ +export function createDeduplicatedLoadSubset( + loadSubset: (options: LoadSubsetOptions) => true | Promise +): (options: LoadSubsetOptions) => true | Promise { + // Combined where predicate for all unlimited calls (no limit) + let unlimitedWhere: BasicExpression | undefined = undefined + + // Flag to track if we've loaded all data (unlimited call with no where clause) + let hasLoadedAllData = false + + // List of all limited calls (with limit, possibly with orderBy) + const limitedCalls: Array = [] + + return (options: LoadSubsetOptions) => { + // If we've loaded all data, everything is covered + if (hasLoadedAllData) { + return true + } + + // Check against unlimited combined predicate + // If we've loaded all data matching a where clause, we don't need to refetch subsets + if (unlimitedWhere !== undefined && options.where !== undefined) { + if (isWhereSubset(options.where, unlimitedWhere)) { + return true // Data already loaded via unlimited call + } + } + + // Check against limited calls + if (options.limit !== undefined) { + const alreadyLoaded = limitedCalls.some((loaded) => + isPredicateSubset(options, loaded) + ) + + if (alreadyLoaded) { + return true // Already loaded + } + } + + // Not covered by existing data - call underlying loadSubset + const resultPromise = loadSubset(options) + + // Handle both sync (true) and async (Promise) return values + if (resultPromise === true) { + // Sync return - update tracking synchronously + updateTracking(options) + return true + } else { + // Async return - update tracking after promise resolves + return resultPromise.then((result) => { + updateTracking(options) + return result + }) + } + } + + function updateTracking(options: LoadSubsetOptions) { + // Update tracking based on whether this was a limited or unlimited call + if (options.limit === undefined) { + // Unlimited call - update combined where predicate + // We ignore orderBy for unlimited calls as mentioned in requirements + if (options.where === undefined) { + // No where clause = all data loaded + hasLoadedAllData = true + unlimitedWhere = undefined + } else if (unlimitedWhere === undefined) { + unlimitedWhere = options.where + } else { + unlimitedWhere = unionWherePredicates([unlimitedWhere, options.where]) + } + } else { + // Limited call - add to list for future subset checks + limitedCalls.push(options) + } + } +} diff --git a/packages/db/tests/subset-dedupe.test.ts b/packages/db/tests/subset-dedupe.test.ts new file mode 100644 index 000000000..19120e73a --- /dev/null +++ b/packages/db/tests/subset-dedupe.test.ts @@ -0,0 +1,309 @@ +import { describe, expect, it } from "vitest" +import { createDeduplicatedLoadSubset } from "../src/query/subset-dedupe" +import { Func, PropRef, Value } from "../src/query/ir" +import type { BasicExpression, OrderBy } from "../src/query/ir" +import type { LoadSubsetOptions } from "../src/types" + +// Helper functions to build expressions more easily +function ref(path: string | Array): PropRef { + return new PropRef(typeof path === `string` ? [path] : path) +} + +function val(value: T): Value { + return new Value(value) +} + +function gt(left: BasicExpression, right: BasicExpression): Func { + return new Func(`gt`, [left, right]) +} + +function lt(left: BasicExpression, right: BasicExpression): Func { + return new Func(`lt`, [left, right]) +} + +function eq(left: BasicExpression, right: BasicExpression): Func { + return new Func(`eq`, [left, right]) +} + +describe(`createDeduplicatedLoadSubset`, () => { + it(`should call underlying loadSubset on first call`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + await deduplicated({ where: gt(ref(`age`), val(10)) }) + + expect(callCount).toBe(1) + }) + + it(`should return true immediately for subset unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 10 + await deduplicated({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: age > 20 (subset of age > 10) + const result = await deduplicated({ where: gt(ref(`age`), val(20)) }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call underlying function + }) + + it(`should call underlying loadSubset for non-subset unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age > 10 (NOT a subset of age > 20) + await deduplicated({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) // Should call underlying function + }) + + it(`should combine unlimited calls with union`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age < 10 (different range) + await deduplicated({ where: lt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) + + // Third call: age > 25 (subset of age > 20) + const result = await deduplicated({ where: gt(ref(`age`), val(25)) }) + expect(result).toBe(true) + expect(callCount).toBe(2) // Should not call - covered by first call + }) + + it(`should track limited calls separately`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: age > 20, orderBy age asc, limit 5 (subset) + const result = await deduplicated({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 5, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - subset of first + }) + + it(`should call underlying for non-subset limited calls`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: age > 10, orderBy age asc, limit 20 (NOT a subset) + await deduplicated({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 20, + }) + expect(callCount).toBe(2) // Should call - limit is larger + }) + + it(`should check limited calls against unlimited combined predicate`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited age > 10 + await deduplicated({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: limited age > 20 with orderBy + limit + // Even though it has a limit, it's covered by the unlimited call + const result = await deduplicated({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 10, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - covered by unlimited + }) + + it(`should ignore orderBy for unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited with orderBy + await deduplicated({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + }) + expect(callCount).toBe(1) + + // Second call: subset where, different orderBy, no limit + const result = await deduplicated({ + where: gt(ref(`age`), val(20)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - orderBy ignored for unlimited + }) + + it(`should handle undefined where clauses`, async () => { + let callCount = 0 + const mockLoadSubset = async () => { + callCount++ + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + // First call: no where clause (all data) + await deduplicated({}) + expect(callCount).toBe(1) + + // Second call: with where clause (should be covered) + const result = await deduplicated({ where: gt(ref(`age`), val(10)) }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - all data already loaded + }) + + it(`should handle complex real-world scenario`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = async (options: LoadSubsetOptions) => { + callCount++ + calls.push(options) + } + + const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`createdAt`), + compareOptions: { + direction: `desc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // Load all active users + await deduplicated({ where: eq(ref(`status`), val(`active`)) }) + expect(callCount).toBe(1) + + // Load top 10 active users by createdAt + const result1 = await deduplicated({ + where: eq(ref(`status`), val(`active`)), + orderBy: orderBy1, + limit: 10, + }) + expect(result1).toBe(true) // Covered by unlimited call + expect(callCount).toBe(1) + + // Load all inactive users + await deduplicated({ where: eq(ref(`status`), val(`inactive`)) }) + expect(callCount).toBe(2) + + // Load top 5 inactive users + const result2 = await deduplicated({ + where: eq(ref(`status`), val(`inactive`)), + orderBy: orderBy1, + limit: 5, + }) + expect(result2).toBe(true) // Covered by unlimited inactive call + expect(callCount).toBe(2) + + // Verify only 2 actual calls were made + expect(calls).toHaveLength(2) + expect(calls[0]).toEqual({ where: eq(ref(`status`), val(`active`)) }) + expect(calls[1]).toEqual({ where: eq(ref(`status`), val(`inactive`)) }) + }) +}) From 73525a6948839591cc37e87cbf897e3d16c91743 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 15 Oct 2025 17:59:07 +0100 Subject: [PATCH 11/13] convert deduper to a class, with reset, and dedupe inflight --- packages/db/src/query/index.ts | 2 +- packages/db/src/query/subset-dedupe.ts | 211 ++++++++++++++++++++---- packages/db/tests/subset-dedupe.test.ts | 102 +++++++----- 3 files changed, 238 insertions(+), 77 deletions(-) diff --git a/packages/db/src/query/index.ts b/packages/db/src/query/index.ts index 66f5f2d94..78dd94bfe 100644 --- a/packages/db/src/query/index.ts +++ b/packages/db/src/query/index.ts @@ -71,4 +71,4 @@ export { unionPredicates, } from "./predicate-utils.js" -export { createDeduplicatedLoadSubset } from "./subset-dedupe.js" +export { DeduplicatedLoadSubset } from "./subset-dedupe.js" diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts index e970dc5cf..b812b8511 100644 --- a/packages/db/src/query/subset-dedupe.ts +++ b/packages/db/src/query/subset-dedupe.ts @@ -7,50 +7,83 @@ import type { BasicExpression } from "./ir.js" import type { LoadSubsetOptions } from "../types.js" /** - * Creates a deduplicated wrapper around a loadSubset function. - * Tracks what data has been loaded and avoids redundant calls. - * - * @param loadSubset - The underlying loadSubset function to wrap - * @returns A wrapped function that deduplicates calls based on loaded predicates + * Deduplicated wrapper for a loadSubset function. + * Tracks what data has been loaded and avoids redundant calls by applying + * subset logic to predicates. * * @example - * const deduplicatedLoadSubset = createDeduplicatedLoadSubset(myLoadSubset) + * const dedupe = new DeduplicatedLoadSubset(myLoadSubset) * * // First call - fetches data - * await deduplicatedLoadSubset({ where: gt(ref('age'), val(10)) }) + * await dedupe.loadSubset({ where: gt(ref('age'), val(10)) }) * * // Second call - subset of first, returns true immediately - * await deduplicatedLoadSubset({ where: gt(ref('age'), val(20)) }) + * await dedupe.loadSubset({ where: gt(ref('age'), val(20)) }) + * + * // Clear state to start fresh + * dedupe.reset() */ -export function createDeduplicatedLoadSubset( - loadSubset: (options: LoadSubsetOptions) => true | Promise -): (options: LoadSubsetOptions) => true | Promise { +export class DeduplicatedLoadSubset { + // The underlying loadSubset function to wrap + private readonly _loadSubset: ( + options: LoadSubsetOptions + ) => true | Promise + // Combined where predicate for all unlimited calls (no limit) - let unlimitedWhere: BasicExpression | undefined = undefined + private unlimitedWhere: BasicExpression | undefined = undefined // Flag to track if we've loaded all data (unlimited call with no where clause) - let hasLoadedAllData = false + private hasLoadedAllData = false // List of all limited calls (with limit, possibly with orderBy) - const limitedCalls: Array = [] + // We clone options before storing to prevent mutation of stored predicates + private limitedCalls: Array = [] + + // Track in-flight calls to prevent concurrent duplicate requests + // We store both the options and the promise so we can apply subset logic + private inflightCalls: Array<{ + options: LoadSubsetOptions + promise: Promise + }> = [] + + // Generation counter to invalidate in-flight requests after reset() + // When reset() is called, this increments, and any in-flight completion handlers + // check if their captured generation matches before updating tracking state + private generation = 0 + + constructor( + loadSubset: (options: LoadSubsetOptions) => true | Promise + ) { + this._loadSubset = loadSubset + } - return (options: LoadSubsetOptions) => { + /** + * Load a subset of data, with automatic deduplication based on previously + * loaded predicates and in-flight requests. + * + * This method is auto-bound, so it can be safely passed as a callback without + * losing its `this` context (e.g., `loadSubset: dedupe.loadSubset` in a sync config). + * + * @param options - The predicate options (where, orderBy, limit) + * @returns true if data is already loaded, or a Promise that resolves when data is loaded + */ + loadSubset = (options: LoadSubsetOptions): true | Promise => { // If we've loaded all data, everything is covered - if (hasLoadedAllData) { + if (this.hasLoadedAllData) { return true } // Check against unlimited combined predicate // If we've loaded all data matching a where clause, we don't need to refetch subsets - if (unlimitedWhere !== undefined && options.where !== undefined) { - if (isWhereSubset(options.where, unlimitedWhere)) { + if (this.unlimitedWhere !== undefined && options.where !== undefined) { + if (isWhereSubset(options.where, this.unlimitedWhere)) { return true // Data already loaded via unlimited call } } // Check against limited calls if (options.limit !== undefined) { - const alreadyLoaded = limitedCalls.some((loaded) => + const alreadyLoaded = this.limitedCalls.some((loaded) => isPredicateSubset(options, loaded) ) @@ -59,40 +92,152 @@ export function createDeduplicatedLoadSubset( } } + // Check against in-flight calls using the same subset logic as resolved calls + // This prevents duplicate requests when concurrent calls have subset relationships + const matchingInflight = this.inflightCalls.find((inflight) => { + // For unlimited calls, check if the incoming where is a subset of the in-flight where + if (inflight.options.limit === undefined && options.limit === undefined) { + // Both unlimited - check where subset + if (inflight.options.where === undefined) { + // In-flight is loading all data, so incoming is covered + return true + } + if (options.where !== undefined) { + return isWhereSubset(options.where, inflight.options.where) + } + return false + } + + // For limited calls, use the full predicate subset check (where + orderBy + limit) + if (inflight.options.limit !== undefined && options.limit !== undefined) { + return isPredicateSubset(options, inflight.options) + } + + // Mixed unlimited/limited - limited calls can be covered by unlimited calls + if (inflight.options.limit === undefined && options.limit !== undefined) { + // In-flight is unlimited, incoming is limited + if (inflight.options.where === undefined) { + // In-flight is loading all data + return true + } + if (options.where !== undefined) { + return isWhereSubset(options.where, inflight.options.where) + } + } + + return false + }) + + if (matchingInflight !== undefined) { + // An in-flight call will load data that covers this request + // Return the same promise so this caller waits for the data to load + // The in-flight promise already handles tracking updates when it completes + return matchingInflight.promise + } + // Not covered by existing data - call underlying loadSubset - const resultPromise = loadSubset(options) + const resultPromise = this._loadSubset(options) // Handle both sync (true) and async (Promise) return values if (resultPromise === true) { // Sync return - update tracking synchronously - updateTracking(options) + // Clone options before storing to protect against caller mutation + this.updateTracking(cloneOptions(options)) return true } else { - // Async return - update tracking after promise resolves - return resultPromise.then((result) => { - updateTracking(options) - return result - }) + // Async return - track the promise and update tracking after it resolves + // Clone options BEFORE entering async context to prevent mutation issues + const clonedOptions = cloneOptions(options) + + // Capture the current generation - this lets us detect if reset() was called + // while this request was in-flight, so we can skip updating tracking state + const capturedGeneration = this.generation + + // We need to create a reference to the in-flight entry so we can remove it later + const inflightEntry = { + options: clonedOptions, // Store cloned options for subset matching + promise: resultPromise + .then((result) => { + // Only update tracking if this request is still from the current generation + // If reset() was called, the generation will have incremented and we should + // not repopulate the state that was just cleared + if (capturedGeneration === this.generation) { + // Use the cloned options that we captured before any caller mutations + // This ensures we track exactly what was loaded, not what the caller changed + this.updateTracking(clonedOptions) + } + return result + }) + .finally(() => { + // Always remove from in-flight array on completion OR rejection + // This ensures failed requests can be retried instead of being cached forever + const index = this.inflightCalls.indexOf(inflightEntry) + if (index !== -1) { + this.inflightCalls.splice(index, 1) + } + }), + } + + // Store the in-flight entry so concurrent subset calls can wait for it + this.inflightCalls.push(inflightEntry) + return inflightEntry.promise } } - function updateTracking(options: LoadSubsetOptions) { + /** + * Reset all tracking state. + * Clears the history of loaded predicates and in-flight calls. + * Use this when you want to start fresh, for example after clearing the underlying data store. + * + * Note: Any in-flight requests will still complete, but they will not update the tracking + * state after the reset. This prevents old requests from repopulating cleared state. + */ + reset(): void { + this.unlimitedWhere = undefined + this.hasLoadedAllData = false + this.limitedCalls = [] + this.inflightCalls = [] + // Increment generation to invalidate any in-flight completion handlers + // This ensures requests that were started before reset() don't repopulate the state + this.generation++ + } + + private updateTracking(options: LoadSubsetOptions): void { // Update tracking based on whether this was a limited or unlimited call if (options.limit === undefined) { // Unlimited call - update combined where predicate // We ignore orderBy for unlimited calls as mentioned in requirements if (options.where === undefined) { // No where clause = all data loaded - hasLoadedAllData = true - unlimitedWhere = undefined - } else if (unlimitedWhere === undefined) { - unlimitedWhere = options.where + this.hasLoadedAllData = true + this.unlimitedWhere = undefined + } else if (this.unlimitedWhere === undefined) { + this.unlimitedWhere = options.where } else { - unlimitedWhere = unionWherePredicates([unlimitedWhere, options.where]) + this.unlimitedWhere = unionWherePredicates([ + this.unlimitedWhere, + options.where, + ]) } } else { // Limited call - add to list for future subset checks - limitedCalls.push(options) + // Options are already cloned by caller to prevent mutation issues + this.limitedCalls.push(options) } } } + +/** + * Clones a LoadSubsetOptions object to prevent mutation of stored predicates. + * This is crucial because callers often reuse the same options object and mutate + * properties like limit or where between calls. Without cloning, our stored history + * would reflect the mutated values rather than what was actually loaded. + */ +function cloneOptions(options: LoadSubsetOptions): LoadSubsetOptions { + return { + where: options.where, + orderBy: options.orderBy, + limit: options.limit, + // Note: We don't clone subscription as it's not part of predicate matching + } +} diff --git a/packages/db/tests/subset-dedupe.test.ts b/packages/db/tests/subset-dedupe.test.ts index 19120e73a..0268b3984 100644 --- a/packages/db/tests/subset-dedupe.test.ts +++ b/packages/db/tests/subset-dedupe.test.ts @@ -1,5 +1,5 @@ import { describe, expect, it } from "vitest" -import { createDeduplicatedLoadSubset } from "../src/query/subset-dedupe" +import { DeduplicatedLoadSubset } from "../src/query/subset-dedupe" import { Func, PropRef, Value } from "../src/query/ir" import type { BasicExpression, OrderBy } from "../src/query/ir" import type { LoadSubsetOptions } from "../src/types" @@ -28,80 +28,89 @@ function eq(left: BasicExpression, right: BasicExpression): Func { describe(`createDeduplicatedLoadSubset`, () => { it(`should call underlying loadSubset on first call`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) - await deduplicated({ where: gt(ref(`age`), val(10)) }) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) expect(callCount).toBe(1) }) it(`should return true immediately for subset unlimited calls`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) // First call: age > 10 - await deduplicated({ where: gt(ref(`age`), val(10)) }) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) expect(callCount).toBe(1) // Second call: age > 20 (subset of age > 10) - const result = await deduplicated({ where: gt(ref(`age`), val(20)) }) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + }) expect(result).toBe(true) expect(callCount).toBe(1) // Should not call underlying function }) it(`should call underlying loadSubset for non-subset unlimited calls`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) // First call: age > 20 - await deduplicated({ where: gt(ref(`age`), val(20)) }) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) expect(callCount).toBe(1) // Second call: age > 10 (NOT a subset of age > 20) - await deduplicated({ where: gt(ref(`age`), val(10)) }) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) expect(callCount).toBe(2) // Should call underlying function }) it(`should combine unlimited calls with union`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) // First call: age > 20 - await deduplicated({ where: gt(ref(`age`), val(20)) }) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) expect(callCount).toBe(1) // Second call: age < 10 (different range) - await deduplicated({ where: lt(ref(`age`), val(10)) }) + await deduplicated.loadSubset({ where: lt(ref(`age`), val(10)) }) expect(callCount).toBe(2) // Third call: age > 25 (subset of age > 20) - const result = await deduplicated({ where: gt(ref(`age`), val(25)) }) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(25)), + }) expect(result).toBe(true) expect(callCount).toBe(2) // Should not call - covered by first call }) it(`should track limited calls separately`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) const orderBy1: OrderBy = [ { @@ -115,7 +124,7 @@ describe(`createDeduplicatedLoadSubset`, () => { ] // First call: age > 10, orderBy age asc, limit 10 - await deduplicated({ + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)), orderBy: orderBy1, limit: 10, @@ -123,7 +132,7 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(callCount).toBe(1) // Second call: age > 20, orderBy age asc, limit 5 (subset) - const result = await deduplicated({ + const result = await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)), orderBy: orderBy1, limit: 5, @@ -134,11 +143,12 @@ describe(`createDeduplicatedLoadSubset`, () => { it(`should call underlying for non-subset limited calls`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) const orderBy1: OrderBy = [ { @@ -152,7 +162,7 @@ describe(`createDeduplicatedLoadSubset`, () => { ] // First call: age > 10, orderBy age asc, limit 10 - await deduplicated({ + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)), orderBy: orderBy1, limit: 10, @@ -160,7 +170,7 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(callCount).toBe(1) // Second call: age > 10, orderBy age asc, limit 20 (NOT a subset) - await deduplicated({ + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)), orderBy: orderBy1, limit: 20, @@ -170,11 +180,12 @@ describe(`createDeduplicatedLoadSubset`, () => { it(`should check limited calls against unlimited combined predicate`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) const orderBy1: OrderBy = [ { @@ -188,12 +199,12 @@ describe(`createDeduplicatedLoadSubset`, () => { ] // First call: unlimited age > 10 - await deduplicated({ where: gt(ref(`age`), val(10)) }) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) expect(callCount).toBe(1) // Second call: limited age > 20 with orderBy + limit // Even though it has a limit, it's covered by the unlimited call - const result = await deduplicated({ + const result = await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)), orderBy: orderBy1, limit: 10, @@ -204,11 +215,12 @@ describe(`createDeduplicatedLoadSubset`, () => { it(`should ignore orderBy for unlimited calls`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) const orderBy1: OrderBy = [ { @@ -222,14 +234,14 @@ describe(`createDeduplicatedLoadSubset`, () => { ] // First call: unlimited with orderBy - await deduplicated({ + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)), orderBy: orderBy1, }) expect(callCount).toBe(1) // Second call: subset where, different orderBy, no limit - const result = await deduplicated({ + const result = await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)), }) expect(result).toBe(true) @@ -238,18 +250,21 @@ describe(`createDeduplicatedLoadSubset`, () => { it(`should handle undefined where clauses`, async () => { let callCount = 0 - const mockLoadSubset = async () => { + const mockLoadSubset = () => { callCount++ + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) // First call: no where clause (all data) - await deduplicated({}) + await deduplicated.loadSubset({}) expect(callCount).toBe(1) // Second call: with where clause (should be covered) - const result = await deduplicated({ where: gt(ref(`age`), val(10)) }) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + }) expect(result).toBe(true) expect(callCount).toBe(1) // Should not call - all data already loaded }) @@ -257,12 +272,13 @@ describe(`createDeduplicatedLoadSubset`, () => { it(`should handle complex real-world scenario`, async () => { let callCount = 0 const calls: Array = [] - const mockLoadSubset = async (options: LoadSubsetOptions) => { + const mockLoadSubset = (options: LoadSubsetOptions) => { callCount++ calls.push(options) + return Promise.resolve() } - const deduplicated = createDeduplicatedLoadSubset(mockLoadSubset) + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) const orderBy1: OrderBy = [ { @@ -276,11 +292,11 @@ describe(`createDeduplicatedLoadSubset`, () => { ] // Load all active users - await deduplicated({ where: eq(ref(`status`), val(`active`)) }) + await deduplicated.loadSubset({ where: eq(ref(`status`), val(`active`)) }) expect(callCount).toBe(1) // Load top 10 active users by createdAt - const result1 = await deduplicated({ + const result1 = await deduplicated.loadSubset({ where: eq(ref(`status`), val(`active`)), orderBy: orderBy1, limit: 10, @@ -289,11 +305,11 @@ describe(`createDeduplicatedLoadSubset`, () => { expect(callCount).toBe(1) // Load all inactive users - await deduplicated({ where: eq(ref(`status`), val(`inactive`)) }) + await deduplicated.loadSubset({ where: eq(ref(`status`), val(`inactive`)) }) expect(callCount).toBe(2) // Load top 5 inactive users - const result2 = await deduplicated({ + const result2 = await deduplicated.loadSubset({ where: eq(ref(`status`), val(`inactive`)), orderBy: orderBy1, limit: 5, From 126f6b9d50b9f7219c94015558ea5ba5ead47179 Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Tue, 14 Oct 2025 11:00:30 +0100 Subject: [PATCH 12/13] Handle pushed down predicates in Electric collection Co-authored-by: Kevin De Porre Co-authored-by: Sam Willis --- .changeset/tender-carpets-cheat.md | 5 + .../electric-db-collection/src/electric.ts | 86 ++- .../src/pg-serializer.ts | 27 + .../src/sql-compiler.ts | 163 +++++ .../tests/electric-live-query.test.ts | 516 +++++++++++++ .../tests/electric.test.ts | 684 +++++++++++++++++- 6 files changed, 1462 insertions(+), 19 deletions(-) create mode 100644 .changeset/tender-carpets-cheat.md create mode 100644 packages/electric-db-collection/src/pg-serializer.ts create mode 100644 packages/electric-db-collection/src/sql-compiler.ts diff --git a/.changeset/tender-carpets-cheat.md b/.changeset/tender-carpets-cheat.md new file mode 100644 index 000000000..77c9dfd73 --- /dev/null +++ b/.changeset/tender-carpets-cheat.md @@ -0,0 +1,5 @@ +--- +"@tanstack/electric-db-collection": patch +--- + +Handle predicates that are pushed down. diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index bdd6f34a7..28a7eb60e 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -12,12 +12,15 @@ import { TimeoutWaitingForMatchError, TimeoutWaitingForTxIdError, } from "./errors" +import { compileSQL } from "./sql-compiler" import type { BaseCollectionConfig, CollectionConfig, DeleteMutationFnParams, InsertMutationFnParams, + LoadSubsetOptions, SyncConfig, + SyncMode, UpdateMutationFnParams, UtilsRecord, } from "@tanstack/db" @@ -72,6 +75,24 @@ type InferSchemaOutput = T extends StandardSchemaV1 : Record : Record +/** + * The mode of sync to use for the collection. + * @default `eager` + * @description + * - `eager`: + * - syncs all data immediately on preload + * - collection will be marked as ready once the sync is complete + * - there is no incremental sync + * - `on-demand`: + * - syncs data in incremental snapshots when the collection is queried + * - collection will be marked as ready immediately after the first snapshot is synced + * - `progressive`: + * - syncs all data for the collection in the background + * - uses incremental snapshots during the initial sync to provide a fast path to the data required for queries + * - collection will be marked as ready once the full sync is complete + */ +export type ElectricSyncMode = SyncMode | `progressive` + /** * Configuration interface for Electric collection options * @template T - The type of items in the collection @@ -82,12 +103,13 @@ export interface ElectricCollectionConfig< TSchema extends StandardSchemaV1 = never, > extends Omit< BaseCollectionConfig, - `onInsert` | `onUpdate` | `onDelete` + `onInsert` | `onUpdate` | `onDelete` | `syncMode` > { /** * Configuration options for the ElectricSQL ShapeStream */ shapeOptions: ShapeStreamOptions> + syncMode?: ElectricSyncMode /** * Optional asynchronous handler function called before an insert operation @@ -281,6 +303,9 @@ export function electricCollectionOptions( } { const seenTxids = new Store>(new Set([])) const seenSnapshots = new Store>([]) + const internalSyncMode = config.syncMode ?? `eager` + const finalSyncMode = + internalSyncMode === `progressive` ? `on-demand` : internalSyncMode const pendingMatches = new Store< Map< string, @@ -331,6 +356,7 @@ export function electricCollectionOptions( const sync = createElectricSync(config.shapeOptions, { seenTxids, seenSnapshots, + syncMode: internalSyncMode, pendingMatches, currentBatchMessages, removePendingMatches, @@ -550,6 +576,7 @@ export function electricCollectionOptions( return { ...restConfig, + syncMode: finalSyncMode, sync, onInsert: wrappedOnInsert, onUpdate: wrappedOnUpdate, @@ -567,6 +594,7 @@ export function electricCollectionOptions( function createElectricSync>( shapeOptions: ShapeStreamOptions>, options: { + syncMode: ElectricSyncMode seenTxids: Store> seenSnapshots: Store> pendingMatches: Store< @@ -590,6 +618,7 @@ function createElectricSync>( const { seenTxids, seenSnapshots, + syncMode, pendingMatches, currentBatchMessages, removePendingMatches, @@ -653,6 +682,12 @@ function createElectricSync>( const stream = new ShapeStream({ ...shapeOptions, + // In on-demand mode, we only want to sync changes, so we set the log to `changes_only` + log: syncMode === `on-demand` ? `changes_only` : undefined, + // In on-demand mode, we only need the changes from the point of time the collection was created + // so we default to `now` when there is no saved offset. + offset: + shapeOptions.offset ?? (syncMode === `on-demand` ? `now` : undefined), signal: abortController.signal, onError: (errorParams) => { // Just immediately mark ready if there's an error to avoid blocking @@ -679,9 +714,11 @@ function createElectricSync>( let transactionStarted = false const newTxids = new Set() const newSnapshots: Array = [] + let hasReceivedUpToDate = false // Track if we've completed initial sync in progressive mode unsubscribeStream = stream.subscribe((messages: Array>) => { let hasUpToDate = false + let hasSnapshotEnd = false for (const message of messages) { // Add message to current batch buffer (for race condition handling) @@ -746,6 +783,7 @@ function createElectricSync>( }) } else if (isSnapshotEndMessage(message)) { newSnapshots.push(parseSnapshotMessage(message)) + hasSnapshotEnd = true } else if (isUpToDateMessage(message)) { hasUpToDate = true } else if (isMustRefetchMessage(message)) { @@ -761,12 +799,14 @@ function createElectricSync>( truncate() - // Reset hasUpToDate so we continue accumulating changes until next up-to-date + // Reset flags so we continue accumulating changes until next up-to-date hasUpToDate = false + hasSnapshotEnd = false + hasReceivedUpToDate = false // Reset for progressive mode - we're starting a new sync } } - if (hasUpToDate) { + if (hasUpToDate || hasSnapshotEnd) { // Clear the current batch buffer since we're now up-to-date currentBatchMessages.setState(() => []) @@ -776,8 +816,15 @@ function createElectricSync>( transactionStarted = false } - // Mark the collection as ready now that sync is up to date - markReady() + if (hasUpToDate || (hasSnapshotEnd && syncMode === `on-demand`)) { + // Mark the collection as ready now that sync is up to date + markReady() + } + + // Track that we've received the first up-to-date for progressive mode + if (hasUpToDate) { + hasReceivedUpToDate = true + } // Always commit txids when we receive up-to-date, regardless of transaction state seenTxids.setState((currentTxids) => { @@ -811,12 +858,29 @@ function createElectricSync>( } }) - // Return the unsubscribe function - return () => { - // Unsubscribe from the stream - unsubscribeStream() - // Abort the abort controller to stop the stream - abortController.abort() + // Only set onLoadSubset if the sync mode is not eager, this indicates to the sync + // layer that it can load more data on demand via the requestSnapshot method when, + // the syncMode = `on-demand` or `progressive` + const loadSubset = + syncMode === `eager` + ? undefined + : async (opts: LoadSubsetOptions) => { + // In progressive mode, stop requesting snapshots once full sync is complete + if (syncMode === `progressive` && hasReceivedUpToDate) { + return + } + const snapshotParams = compileSQL(opts) + await stream.requestSnapshot(snapshotParams) + } + + return { + loadSubset, + cleanup: () => { + // Unsubscribe from the stream + unsubscribeStream() + // Abort the abort controller to stop the stream + abortController.abort() + }, } }, // Expose the getSyncMetadata function diff --git a/packages/electric-db-collection/src/pg-serializer.ts b/packages/electric-db-collection/src/pg-serializer.ts new file mode 100644 index 000000000..707c4e1b8 --- /dev/null +++ b/packages/electric-db-collection/src/pg-serializer.ts @@ -0,0 +1,27 @@ +export function serialize(value: unknown): string { + if (typeof value === `string`) { + return `'${value}'` + } + + if (typeof value === `number`) { + return value.toString() + } + + if (value === null || value === undefined) { + return `NULL` + } + + if (typeof value === `boolean`) { + return value ? `true` : `false` + } + + if (value instanceof Date) { + return `'${value.toISOString()}'` + } + + if (Array.isArray(value)) { + return `ARRAY[${value.map(serialize).join(`,`)}]` + } + + throw new Error(`Cannot serialize value: ${JSON.stringify(value)}`) +} diff --git a/packages/electric-db-collection/src/sql-compiler.ts b/packages/electric-db-collection/src/sql-compiler.ts new file mode 100644 index 000000000..969869aae --- /dev/null +++ b/packages/electric-db-collection/src/sql-compiler.ts @@ -0,0 +1,163 @@ +import { serialize } from "./pg-serializer" +import type { SubsetParams } from "@electric-sql/client" +import type { IR, LoadSubsetOptions } from "@tanstack/db" + +export type CompiledSqlRecord = Omit & { + params?: Array +} + +export function compileSQL(options: LoadSubsetOptions): SubsetParams { + const { where, orderBy, limit } = options + + const params: Array = [] + const compiledSQL: CompiledSqlRecord = { params } + + if (where) { + // TODO: this only works when the where expression's PropRefs directly reference a column of the collection + // doesn't work if it goes through aliases because then we need to know the entire query to be able to follow the reference until the base collection (cf. followRef function) + compiledSQL.where = compileBasicExpression(where, params) + } + + if (orderBy) { + compiledSQL.orderBy = compileOrderBy(orderBy, params) + } + + if (limit) { + compiledSQL.limit = limit + } + + // Serialize the values in the params array into PG formatted strings + // and transform the array into a Record + const paramsRecord = params.reduce( + (acc, param, index) => { + acc[`${index + 1}`] = serialize(param) + return acc + }, + {} as Record + ) + + return { + ...compiledSQL, + params: paramsRecord, + } +} + +/** + * Compiles the expression to a SQL string and mutates the params array with the values. + * @param exp - The expression to compile + * @param params - The params array + * @returns The compiled SQL string + */ +function compileBasicExpression( + exp: IR.BasicExpression, + params: Array +): string { + switch (exp.type) { + case `val`: + params.push(exp.value) + return `$${params.length}` + case `ref`: + // TODO: doesn't yet support JSON(B) values which could be accessed with nested props + if (exp.path.length !== 1) { + throw new Error( + `Compiler can't handle nested properties: ${exp.path.join(`.`)}` + ) + } + return exp.path[0]! + case `func`: + return compileFunction(exp, params) + default: + throw new Error(`Unknown expression type`) + } +} + +function compileOrderBy(orderBy: IR.OrderBy, params: Array): string { + const compiledOrderByClauses = orderBy.map((clause: IR.OrderByClause) => + compileOrderByClause(clause, params) + ) + return compiledOrderByClauses.join(`,`) +} + +function compileOrderByClause( + clause: IR.OrderByClause, + params: Array +): string { + // TODO: what to do with stringSort and locale? + // Correctly supporting them is tricky as it depends on Postgres' collation + const { expression, compareOptions } = clause + let sql = compileBasicExpression(expression, params) + + if (compareOptions.direction === `desc`) { + sql = `${sql} DESC` + } + + if (compareOptions.nulls === `first`) { + sql = `${sql} NULLS FIRST` + } + + if (compareOptions.nulls === `last`) { + sql = `${sql} NULLS LAST` + } + + return sql +} + +function compileFunction( + exp: IR.Func, + params: Array = [] +): string { + const { name, args } = exp + + const opName = getOpName(name) + + const compiledArgs = args.map((arg: IR.BasicExpression) => + compileBasicExpression(arg, params) + ) + + if (isBinaryOp(name)) { + if (compiledArgs.length !== 2) { + throw new Error(`Binary operator ${name} expects 2 arguments`) + } + const [lhs, rhs] = compiledArgs + return `${lhs} ${opName} ${rhs}` + } + + return `${opName}(${compiledArgs.join(`,`)})` +} + +function isBinaryOp(name: string): boolean { + const binaryOps = [`eq`, `gt`, `gte`, `lt`, `lte`, `and`, `or`] + return binaryOps.includes(name) +} + +function getOpName(name: string): string { + const opNames = { + eq: `=`, + gt: `>`, + gte: `>=`, + lt: `<`, + lte: `<=`, + add: `+`, + and: `AND`, + or: `OR`, + not: `NOT`, + isUndefined: `IS NULL`, + isNull: `IS NULL`, + in: `IN`, + like: `LIKE`, + ilike: `ILIKE`, + upper: `UPPER`, + lower: `LOWER`, + length: `LENGTH`, + concat: `CONCAT`, + coalesce: `COALESCE`, + } + + const opName = opNames[name as keyof typeof opNames] + + if (!opName) { + throw new Error(`Unknown operator/function: ${name}`) + } + + return opName +} diff --git a/packages/electric-db-collection/tests/electric-live-query.test.ts b/packages/electric-db-collection/tests/electric-live-query.test.ts index b387f1756..1cd952506 100644 --- a/packages/electric-db-collection/tests/electric-live-query.test.ts +++ b/packages/electric-db-collection/tests/electric-live-query.test.ts @@ -54,10 +54,39 @@ const sampleUsers: Array = [ // Mock the ShapeStream module const mockSubscribe = vi.fn() +const mockRequestSnapshot = vi.fn() const mockStream = { subscribe: mockSubscribe, + requestSnapshot: async (...args: any) => { + const result = await mockRequestSnapshot(...args) + const subscribers = mockSubscribe.mock.calls.map((args) => args[0]) + const data = [...result.data] + + const messages: Array> = data.map((row: any) => ({ + value: row.value, + key: row.key, + headers: row.headers, + })) + + if (messages.length > 0) { + // add an up-to-date message + messages.push({ + headers: { control: `up-to-date` }, + }) + } + + subscribers.forEach((subscriber) => subscriber(messages)) + return result + }, } +// Mock the requestSnapshot method +// to return an empty array of data +// since most tests don't use it +mockRequestSnapshot.mockResolvedValue({ + data: [], +}) + vi.mock(`@electric-sql/client`, async () => { const actual = await vi.importActual(`@electric-sql/client`) return { @@ -437,4 +466,491 @@ describe.each([ // Clean up subscription.unsubscribe() }) + if (autoIndex === `eager`) { + it(`should load more data via requestSnapshot when creating live query with higher limit`, async () => { + // Create a new electric collection with on-demand syncMode for this test + vi.clearAllMocks() + + let testSubscriber: (messages: Array>) => void = () => {} + mockSubscribe.mockImplementation((callback) => { + testSubscriber = callback + return () => {} + }) + + const testElectricCollection = createCollection( + electricCollectionOptions({ + id: `test-incremental-loading`, + shapeOptions: { + url: `http://test-url`, + params: { table: `users` }, + }, + syncMode: `on-demand`, + getKey: (user: User) => user.id, + startSync: true, + autoIndex: `eager` as const, + }) + ) + + mockRequestSnapshot.mockResolvedValue({ + data: [], + }) + + // Initial sync with limited data + testSubscriber([ + ...sampleUsers.map((user) => ({ + key: user.id.toString(), + value: user, + headers: { operation: `insert` as const }, + })), + { headers: { control: `up-to-date` as const } }, + ]) + + expect(testElectricCollection.status).toBe(`ready`) + expect(testElectricCollection.size).toBe(4) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(0) + + // Create first live query with limit of 2 + const limitedLiveQuery = createLiveQueryCollection({ + id: `limited-users-live-query`, + startSync: true, + query: (q) => + q + .from({ user: testElectricCollection }) + .where(({ user }) => eq(user.active, true)) + .select(({ user }) => ({ + id: user.id, + name: user.name, + active: user.active, + age: user.age, + })) + .orderBy(({ user }) => user.age, `asc`) + .limit(2), + }) + + expect(limitedLiveQuery.status).toBe(`ready`) + expect(limitedLiveQuery.size).toBe(2) // Only first 2 active users + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + const callArgs = (index: number) => + mockRequestSnapshot.mock.calls[index]?.[0] + expect(callArgs(0)).toMatchObject({ + params: { "1": `true` }, + where: `active = $1`, + orderBy: `age NULLS FIRST`, + limit: 2, + }) + + // Next call will return a snapshot containing 2 rows + // Calls after that will return the default empty snapshot + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 5, + value: { + id: 5, + name: `Eve`, + age: 30, + email: `eve@example.com`, + active: true, + }, + }, + { + headers: { operation: `insert` }, + key: 6, + value: { + id: 6, + name: `Frank`, + age: 35, + email: `frank@example.com`, + active: true, + }, + }, + ], + }) + + // Create second live query with higher limit of 6 + const expandedLiveQuery = createLiveQueryCollection({ + id: `expanded-users-live-query`, + startSync: true, + query: (q) => + q + .from({ user: testElectricCollection }) + .where(({ user }) => eq(user.active, true)) + .select(({ user }) => ({ + id: user.id, + name: user.name, + active: user.active, + })) + .orderBy(({ user }) => user.age, `asc`) + .limit(6), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Verify that requestSnapshot was called with the correct parameters + expect(mockRequestSnapshot).toHaveBeenCalledTimes(4) + + // Check that first it requested a limit of 6 users + expect(callArgs(1)).toMatchObject({ + params: { "1": `true` }, + where: `active = $1`, + orderBy: `age NULLS FIRST`, + limit: 6, + }) + + // After this initial snapshot for the new live query it receives all 3 users from the local collection + // so it still needs 3 more users to reach the limit of 6 so it requests 3 more to the sync layer + expect(callArgs(2)).toMatchObject({ + params: { "1": `true`, "2": `25` }, + where: `active = $1 AND age > $2`, + orderBy: `age NULLS FIRST`, + limit: 3, + }) + + // The previous snapshot returned 2 more users so it still needs 1 more user to reach the limit of 6 + expect(callArgs(3)).toMatchObject({ + params: { "1": `true`, "2": `35` }, + where: `active = $1 AND age > $2`, + orderBy: `age NULLS FIRST`, + limit: 1, + }) + + // The sync layer won't provide any more users so the DB is exhausted and it stops (i.e. doesn't request more) + + // The expanded live query should now have more data + expect(expandedLiveQuery.status).toBe(`ready`) + expect(expandedLiveQuery.size).toBe(5) // Alice, Bob, Dave from initial + Eve and Frank from additional data + }) + } +}) + +// Tests specifically for syncMode behavior with live queries +describe(`Electric Collection with Live Query - syncMode integration`, () => { + let subscriber: (messages: Array>) => void + + function createElectricCollectionWithSyncMode( + syncMode: `eager` | `on-demand` | `progressive` + ) { + vi.clearAllMocks() + + mockSubscribe.mockImplementation((callback) => { + subscriber = callback + return () => {} + }) + + mockRequestSnapshot.mockResolvedValue({ + data: [], + }) + + const config = { + id: `electric-users-${syncMode}`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `users`, + }, + }, + syncMode, + getKey: (user: User) => user.id, + } + + const options = electricCollectionOptions(config) + return createCollection({ + ...options, + startSync: true, + autoIndex: `eager` as const, + }) + } + + function simulateInitialSync(users: Array = sampleUsers) { + const messages: Array> = users.map((user) => ({ + key: user.id.toString(), + value: user, + headers: { operation: `insert` }, + })) + + messages.push({ + headers: { control: `up-to-date` }, + }) + + subscriber(messages) + } + + it(`should trigger requestSnapshot in on-demand mode when live query needs more data`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + // Initial sync with limited data + simulateInitialSync([sampleUsers[0]!, sampleUsers[1]!]) // Only Alice and Bob + expect(electricCollection.status).toBe(`ready`) + expect(electricCollection.size).toBe(2) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(0) + + // Mock requestSnapshot to return additional data + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 3, + value: sampleUsers[2]!, // Charlie + }, + { + headers: { operation: `insert` }, + key: 4, + value: sampleUsers[3]!, // Dave + }, + ], + }) + + // Create live query with limit that exceeds available data + const liveQuery = createLiveQueryCollection({ + id: `on-demand-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(5), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested more data from Electric with correct parameters + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 5, // Requests full limit from Electric + orderBy: `age NULLS FIRST`, + where: `active = $1`, + params: { 1: `true` }, // Parameters are stringified + }) + ) + expect(liveQuery.size).toBeGreaterThan(2) + }) + + it(`should trigger requestSnapshot in progressive mode when live query needs more data`, async () => { + const electricCollection = + createElectricCollectionWithSyncMode(`progressive`) + + // Send initial snapshot with limited data (using snapshot-end, not up-to-date) + // This keeps the collection in "loading" state, simulating progressive mode still syncing + subscriber([ + { + key: sampleUsers[0]!.id.toString(), + value: sampleUsers[0]!, + headers: { operation: `insert` }, + }, + { + key: sampleUsers[1]!.id.toString(), + value: sampleUsers[1]!, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(electricCollection.status).toBe(`loading`) // Still syncing in progressive mode + expect(electricCollection.size).toBe(2) + + // Mock requestSnapshot to return additional data + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 3, + value: sampleUsers[2]!, // Charlie + }, + ], + }) + + // Create live query that needs more data + createLiveQueryCollection({ + id: `progressive-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .orderBy(({ user }) => user.id, `asc`) + .limit(3), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested more data from Electric with correct parameters + // First request asks for the full limit + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 3, // Requests full limit from Electric + orderBy: `id NULLS FIRST`, + params: {}, + }) + ) + }) + + it(`should NOT trigger requestSnapshot in eager mode even when live query needs more data`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`eager`) + + // Initial sync with limited data + simulateInitialSync([sampleUsers[0]!, sampleUsers[1]!]) // Only Alice and Bob + expect(electricCollection.status).toBe(`ready`) + expect(electricCollection.size).toBe(2) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(0) + + // Create live query with limit that exceeds available data + const liveQuery = createLiveQueryCollection({ + id: `eager-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(5), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should NOT have requested more data (eager mode doesn't support incremental loading) + expect(mockRequestSnapshot).not.toHaveBeenCalled() + expect(liveQuery.size).toBe(2) // Only has the initially synced data + }) + + it(`should request additional snapshots progressively as live query expands in on-demand mode`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + // Initial sync with just Alice + simulateInitialSync([sampleUsers[0]!]) + expect(electricCollection.size).toBe(1) + + const callArgs = (index: number) => + mockRequestSnapshot.mock.calls[index]?.[0] + + // First snapshot returns Bob and Charlie + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 2, + value: sampleUsers[1]!, // Bob + }, + { + headers: { operation: `insert` }, + key: 3, + value: sampleUsers[2]!, // Charlie + }, + ], + }) + + // Create live query with limit of 3 + createLiveQueryCollection({ + id: `expanding-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .orderBy(({ user }) => user.age, `asc`) + .limit(3), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested snapshot for limit 3 + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 3, + orderBy: `age NULLS FIRST`, + }) + ) + + // After receiving Bob and Charlie, the collection now has 3 users (Alice + Bob + Charlie) + // but it still requests 2 more... TODO: check if this is correct? + expect(callArgs(1)).toMatchObject({ + limit: 2, + orderBy: `age NULLS FIRST`, + }) + }) + + it(`should pass correct WHERE clause to requestSnapshot when live query has filters`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.size).toBe(0) + + // Create filtered live query + createLiveQueryCollection({ + id: `filtered-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.name, `desc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested snapshot with WHERE clause + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + where: `active = $1`, + params: { "1": `true` }, + orderBy: `name DESC NULLS FIRST`, + limit: 10, + }) + ) + }) + + it(`should handle complex filters in requestSnapshot`, async () => { + const electricCollection = + createElectricCollectionWithSyncMode(`progressive`) + + // Send snapshot-end (not up-to-date) to keep collection in loading state + subscriber([ + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(electricCollection.status).toBe(`loading`) // Still syncing in progressive mode + + // Create live query with complex WHERE clause + createLiveQueryCollection({ + id: `complex-filter-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 20)) + .orderBy(({ user }) => user.age, `asc`) + .limit(5), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested snapshot with complex WHERE clause + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + where: `age > $1`, + params: { "1": `20` }, + orderBy: `age NULLS FIRST`, + limit: 5, + }) + ) + }) }) diff --git a/packages/electric-db-collection/tests/electric.test.ts b/packages/electric-db-collection/tests/electric.test.ts index bf059a021..032e42033 100644 --- a/packages/electric-db-collection/tests/electric.test.ts +++ b/packages/electric-db-collection/tests/electric.test.ts @@ -19,8 +19,10 @@ import type { StandardSchemaV1 } from "@standard-schema/spec" // Mock the ShapeStream module const mockSubscribe = vi.fn() +const mockRequestSnapshot = vi.fn() const mockStream = { subscribe: mockSubscribe, + requestSnapshot: mockRequestSnapshot, } vi.mock(`@electric-sql/client`, async () => { @@ -50,6 +52,9 @@ describe(`Electric Integration`, () => { return () => {} }) + // Reset mock requestSnapshot + mockRequestSnapshot.mockResolvedValue(undefined) + // Create collection with Electric configuration const config = { id: `test`, @@ -728,6 +733,9 @@ describe(`Electric Integration`, () => { expect(testCollection.has(1)).toBe(true) }) + // NOTE: This test has a known issue with unhandled rejection warnings + // This is a pre-existing issue from main branch (not caused by merge) + // The test functionality works correctly, but vitest reports unhandled rejections it(`should timeout with custom match function when no match found`, async () => { vi.useFakeTimers() @@ -754,14 +762,16 @@ describe(`Electric Integration`, () => { const testCollection = createCollection(electricCollectionOptions(config)) const tx = testCollection.insert({ id: 1, name: `Timeout Test` }) - // Add catch handler to prevent global unhandled rejection detection - tx.isPersisted.promise.catch(() => {}) + // Capture the rejection promise before advancing timers + const rejectionPromise = expect(tx.isPersisted.promise).rejects.toThrow( + `Timeout waiting for custom match function` + ) // Advance timers to trigger timeout await vi.runOnlyPendingTimersAsync() // Should timeout and fail - await expect(tx.isPersisted.promise).rejects.toThrow() + await rejectionPromise vi.useRealTimers() }) @@ -834,6 +844,9 @@ describe(`Electric Integration`, () => { expect(options.onDelete).toBeDefined() }) + // NOTE: This test has a known issue with unhandled rejection warnings + // This is a pre-existing issue from main branch (not caused by merge) + // The test functionality works correctly, but vitest reports unhandled rejections it(`should cleanup pending matches on timeout without memory leaks`, async () => { vi.useFakeTimers() @@ -862,16 +875,16 @@ describe(`Electric Integration`, () => { // Start insert that will timeout const tx = testCollection.insert({ id: 1, name: `Timeout Test` }) - // Add catch handler to prevent global unhandled rejection detection - tx.isPersisted.promise.catch(() => {}) + // Capture the rejection promise before advancing timers + const rejectionPromise = expect(tx.isPersisted.promise).rejects.toThrow( + `Timeout waiting for custom match function` + ) // Advance timers to trigger timeout await vi.runOnlyPendingTimersAsync() // Should timeout and fail - await expect(tx.isPersisted.promise).rejects.toThrow( - `Timeout waiting for custom match function` - ) + await rejectionPromise // Send a message after timeout - should not cause any side effects // This verifies that the pending match was properly cleaned up @@ -1601,7 +1614,662 @@ describe(`Electric Integration`, () => { // Snapshot txid should also resolve await expect(testCollection.utils.awaitTxId(105)).resolves.toBe(true) }) + }) + + // Tests for syncMode configuration + describe(`syncMode configuration`, () => { + it(`should not request snapshots during subscription in eager mode`, () => { + vi.clearAllMocks() + + const config = { + id: `eager-no-snapshot-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Subscribe and try to get more data + const subscription = testCollection.subscribeChanges(() => {}) + + // In eager mode, requestSnapshot should not be called + expect(mockRequestSnapshot).not.toHaveBeenCalled() + + subscription.unsubscribe() + }) + + it(`should request incremental snapshots in on-demand mode when loadSubset is called`, async () => { + vi.clearAllMocks() + + const config = { + id: `on-demand-snapshot-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send up-to-date to mark collection as ready + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // In on-demand mode, calling loadSubset should request a snapshot + await testCollection._sync.loadSubset({ limit: 10 }) + + // Verify requestSnapshot was called + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 10, + params: {}, + }) + ) + }) + + it(`should request incremental snapshots in progressive mode when loadSubset is called before sync completes`, async () => { + vi.clearAllMocks() + + const config = { + id: `progressive-snapshot-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with snapshot-end (but not up-to-date yet - still syncing) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(testCollection.status).toBe(`loading`) // Not ready yet + + // In progressive mode, calling loadSubset should request a snapshot BEFORE full sync completes + await testCollection._sync.loadSubset({ limit: 20 }) + + // Verify requestSnapshot was called + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 20, + params: {}, + }) + ) + }) + + it(`should not request snapshots when loadSubset is called in eager mode`, async () => { + vi.clearAllMocks() + + const config = { + id: `eager-no-loadsubset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send up-to-date to mark collection as ready + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // In eager mode, loadSubset should do nothing + await testCollection._sync.loadSubset({ limit: 10 }) + + // Verify requestSnapshot was NOT called + expect(mockRequestSnapshot).not.toHaveBeenCalled() + }) + + it(`should handle progressive mode syncing in background`, async () => { + vi.clearAllMocks() + + const config = { + id: `progressive-background-sync-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with snapshot-end (but not up-to-date - still syncing) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Initial User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Collection should have data but not be ready yet + expect(testCollection.status).toBe(`loading`) + expect(testCollection.has(1)).toBe(true) + + // Should be able to request more data incrementally before full sync completes + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).toHaveBeenCalled() + + // Now send up-to-date to complete the sync + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + expect(testCollection.status).toBe(`ready`) + }) + + it(`should stop requesting snapshots in progressive mode after first up-to-date`, async () => { + vi.clearAllMocks() + + const config = { + id: `progressive-stop-after-sync-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with snapshot-end (not up-to-date yet) + subscriber([ + { + key: `1`, + value: { id: 1, name: `User 1` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(testCollection.status).toBe(`loading`) // Not ready yet in progressive + expect(testCollection.has(1)).toBe(true) + + // Should be able to request more data before up-to-date + vi.clearAllMocks() + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Now send up-to-date to complete the full sync + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + expect(testCollection.status).toBe(`ready`) + + // Try to request more data - should NOT make a request since full sync is complete + vi.clearAllMocks() + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).not.toHaveBeenCalled() + }) + + it(`should allow snapshots in on-demand mode even after up-to-date`, async () => { + vi.clearAllMocks() + + const config = { + id: `on-demand-after-sync-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with up-to-date + subscriber([ + { + key: `1`, + value: { id: 1, name: `User 1` }, + headers: { operation: `insert` }, + }, + { + headers: { control: `up-to-date` }, + }, + ]) + + expect(testCollection.status).toBe(`ready`) + + // Should STILL be able to request more data in on-demand mode + vi.clearAllMocks() + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).toHaveBeenCalled() + }) + + it(`should default offset to 'now' in on-demand mode when no offset provided`, async () => { + vi.clearAllMocks() + + // Import ShapeStream to check constructor calls + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `on-demand-offset-now-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + // No offset provided + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with offset: 'now' + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + }) + ) + }) + + it(`should use undefined offset in eager mode when no offset provided`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `eager-offset-undefined-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + // No offset provided + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with offset: undefined + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: undefined, + }) + ) + }) + + it(`should use undefined offset in progressive mode when no offset provided`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `progressive-offset-undefined-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + // No offset provided + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with offset: undefined + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: undefined, + }) + ) + }) + + it(`should use explicit offset when provided regardless of syncMode`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `explicit-offset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + offset: -1 as any, // Explicit offset + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with the explicit offset + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: -1, + }) + ) + }) + }) + + // Tests for commit and ready behavior with snapshot-end and up-to-date messages + describe(`Commit and ready behavior`, () => { + it(`should commit on snapshot-end in eager mode but not mark ready`, () => { + const config = { + id: `eager-snapshot-end-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by snapshot-end (but no up-to-date) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Data should be committed (available in state) + expect(testCollection.has(1)).toBe(true) + expect(testCollection.get(1)).toEqual({ id: 1, name: `Test User` }) + + // But collection should NOT be marked as ready yet in eager mode + expect(testCollection.status).toBe(`loading`) + + // Now send up-to-date + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // Now it should be ready + expect(testCollection.status).toBe(`ready`) + }) + + it(`should commit and mark ready on snapshot-end in on-demand mode`, () => { + const config = { + id: `on-demand-snapshot-end-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by snapshot-end (but no up-to-date) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Data should be committed (available in state) + expect(testCollection.has(1)).toBe(true) + expect(testCollection.get(1)).toEqual({ id: 1, name: `Test User` }) + + // Collection SHOULD be marked as ready in on-demand mode + expect(testCollection.status).toBe(`ready`) + }) + + it(`should commit on snapshot-end in progressive mode but not mark ready`, () => { + const config = { + id: `progressive-snapshot-end-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by snapshot-end (but no up-to-date) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Data should be committed (available in state) + expect(testCollection.has(1)).toBe(true) + expect(testCollection.get(1)).toEqual({ id: 1, name: `Test User` }) + + // But collection should NOT be marked as ready yet in progressive mode + expect(testCollection.status).toBe(`loading`) + + // Now send up-to-date + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // Now it should be ready + expect(testCollection.status).toBe(`ready`) + }) + + it(`should commit multiple snapshot-end messages before up-to-date in eager mode`, () => { + const config = { + id: `eager-multiple-snapshots-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // First snapshot with data + subscriber([ + { + key: `1`, + value: { id: 1, name: `User 1` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // First data should be committed + expect(testCollection.has(1)).toBe(true) + expect(testCollection.status).toBe(`loading`) + + // Second snapshot with more data + subscriber([ + { + key: `2`, + value: { id: 2, name: `User 2` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `110`, + xmax: `120`, + xip_list: [], + }, + }, + ]) + + // Second data should also be committed + expect(testCollection.has(2)).toBe(true) + expect(testCollection.size).toBe(2) + expect(testCollection.status).toBe(`loading`) + + // Finally send up-to-date + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // Now should be ready + expect(testCollection.status).toBe(`ready`) + }) + + it(`should handle up-to-date without snapshot-end (traditional behavior)`, () => { + const config = { + id: `traditional-up-to-date-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by up-to-date (no snapshot-end) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { control: `up-to-date` }, + }, + ]) + + // Data should be committed and collection ready + expect(testCollection.has(1)).toBe(true) + expect(testCollection.status).toBe(`ready`) + }) + }) + describe(`syncMode configuration - GC and resync`, () => { it(`should resync after garbage collection and new subscription`, () => { // Use fake timers for this test vi.useFakeTimers() From f7211d92a30e4a9246b9df9f9b4e5f92d156ecfa Mon Sep 17 00:00:00 2001 From: Sam Willis Date: Wed, 15 Oct 2025 19:31:00 +0100 Subject: [PATCH 13/13] use the subsetDuduper for electric --- .../electric-db-collection/src/electric.ts | 39 +- .../tests/electric-live-query.test.ts | 378 ++++++++++++++++-- .../tests/electric.test-d.ts | 3 +- packages/electric-db-collection/tsconfig.json | 4 +- 4 files changed, 373 insertions(+), 51 deletions(-) diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index 28a7eb60e..a71ae10bd 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -6,6 +6,7 @@ import { } from "@electric-sql/client" import { Store } from "@tanstack/store" import DebugModule from "debug" +import { DeduplicatedLoadSubset } from "@tanstack/db" import { ExpectedNumberInAwaitTxIdError, StreamAbortedError, @@ -716,6 +717,21 @@ function createElectricSync>( const newSnapshots: Array = [] let hasReceivedUpToDate = false // Track if we've completed initial sync in progressive mode + // Create deduplicated loadSubset wrapper for non-eager modes + // This prevents redundant snapshot requests when multiple concurrent + // live queries request overlapping or subset predicates + const loadSubsetDedupe = + syncMode === `eager` + ? null + : new DeduplicatedLoadSubset(async (opts: LoadSubsetOptions) => { + // In progressive mode, stop requesting snapshots once full sync is complete + if (syncMode === `progressive` && hasReceivedUpToDate) { + return + } + const snapshotParams = compileSQL(opts) + await stream.requestSnapshot(snapshotParams) + }) + unsubscribeStream = stream.subscribe((messages: Array>) => { let hasUpToDate = false let hasSnapshotEnd = false @@ -799,6 +815,10 @@ function createElectricSync>( truncate() + // Reset the loadSubset deduplication state since we're starting fresh + // This ensures that previously loaded predicates don't prevent refetching after truncate + loadSubsetDedupe?.reset() + // Reset flags so we continue accumulating changes until next up-to-date hasUpToDate = false hasSnapshotEnd = false @@ -858,23 +878,10 @@ function createElectricSync>( } }) - // Only set onLoadSubset if the sync mode is not eager, this indicates to the sync - // layer that it can load more data on demand via the requestSnapshot method when, - // the syncMode = `on-demand` or `progressive` - const loadSubset = - syncMode === `eager` - ? undefined - : async (opts: LoadSubsetOptions) => { - // In progressive mode, stop requesting snapshots once full sync is complete - if (syncMode === `progressive` && hasReceivedUpToDate) { - return - } - const snapshotParams = compileSQL(opts) - await stream.requestSnapshot(snapshotParams) - } - + // Return the deduplicated loadSubset if available (on-demand or progressive mode) + // The loadSubset method is auto-bound, so it can be safely returned directly return { - loadSubset, + loadSubset: loadSubsetDedupe?.loadSubset, cleanup: () => { // Unsubscribe from the stream unsubscribeStream() diff --git a/packages/electric-db-collection/tests/electric-live-query.test.ts b/packages/electric-db-collection/tests/electric-live-query.test.ts index 1cd952506..b3a55d087 100644 --- a/packages/electric-db-collection/tests/electric-live-query.test.ts +++ b/packages/electric-db-collection/tests/electric-live-query.test.ts @@ -4,6 +4,7 @@ import { createLiveQueryCollection, eq, gt, + lt, } from "@tanstack/db" import { electricCollectionOptions } from "../src/electric" import type { ElectricCollectionUtils } from "../src/electric" @@ -59,7 +60,7 @@ const mockStream = { subscribe: mockSubscribe, requestSnapshot: async (...args: any) => { const result = await mockRequestSnapshot(...args) - const subscribers = mockSubscribe.mock.calls.map((args) => args[0]) + const subscribers = mockSubscribe.mock.calls.map((call) => call[0]) const data = [...result.data] const messages: Array> = data.map((row: any) => ({ @@ -589,39 +590,33 @@ describe.each([ // Wait for the live query to process await new Promise((resolve) => setTimeout(resolve, 0)) - // Verify that requestSnapshot was called with the correct parameters - expect(mockRequestSnapshot).toHaveBeenCalledTimes(4) + // With deduplication, the expanded query (limit 6) is NOT a subset of the limited query (limit 2), + // so it will trigger a new requestSnapshot call. However, some of the recursive + // calls may be deduped if they're covered by the union of previous unlimited calls. + // We expect at least 2 calls: the initial limit 2 and the initial limit 6. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) - // Check that first it requested a limit of 6 users - expect(callArgs(1)).toMatchObject({ + // Check that first it requested a limit of 2 users (from first query) + expect(callArgs(0)).toMatchObject({ params: { "1": `true` }, where: `active = $1`, orderBy: `age NULLS FIRST`, - limit: 6, - }) - - // After this initial snapshot for the new live query it receives all 3 users from the local collection - // so it still needs 3 more users to reach the limit of 6 so it requests 3 more to the sync layer - expect(callArgs(2)).toMatchObject({ - params: { "1": `true`, "2": `25` }, - where: `active = $1 AND age > $2`, - orderBy: `age NULLS FIRST`, - limit: 3, + limit: 2, }) - // The previous snapshot returned 2 more users so it still needs 1 more user to reach the limit of 6 - expect(callArgs(3)).toMatchObject({ - params: { "1": `true`, "2": `35` }, - where: `active = $1 AND age > $2`, + // Check that second it requested a limit of 6 users (from second query) + expect(callArgs(1)).toMatchObject({ + params: { "1": `true` }, + where: `active = $1`, orderBy: `age NULLS FIRST`, - limit: 1, + limit: 6, }) - // The sync layer won't provide any more users so the DB is exhausted and it stops (i.e. doesn't request more) - - // The expanded live query should now have more data + // The expanded live query should have the locally available data expect(expandedLiveQuery.status).toBe(`ready`) - expect(expandedLiveQuery.size).toBe(5) // Alice, Bob, Dave from initial + Eve and Frank from additional data + // The mock returned 2 additional users (Eve and Frank) in response to the limit 6 request, + // plus the initial 3 active users (Alice, Bob, Dave) from the initial sync + expect(expandedLiveQuery.size).toBe(5) }) } }) @@ -832,9 +827,6 @@ describe(`Electric Collection with Live Query - syncMode integration`, () => { simulateInitialSync([sampleUsers[0]!]) expect(electricCollection.size).toBe(1) - const callArgs = (index: number) => - mockRequestSnapshot.mock.calls[index]?.[0] - // First snapshot returns Bob and Charlie mockRequestSnapshot.mockResolvedValueOnce({ data: [ @@ -872,12 +864,11 @@ describe(`Electric Collection with Live Query - syncMode integration`, () => { }) ) - // After receiving Bob and Charlie, the collection now has 3 users (Alice + Bob + Charlie) - // but it still requests 2 more... TODO: check if this is correct? - expect(callArgs(1)).toMatchObject({ - limit: 2, - orderBy: `age NULLS FIRST`, - }) + // With deduplication, the unlimited where predicate (no where clause) is tracked, + // and subsequent calls for the same unlimited predicate may be deduped. + // After receiving Bob and Charlie, we have 3 users total, which satisfies the limit of 3, + // so no additional requests should be made. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) }) it(`should pass correct WHERE clause to requestSnapshot when live query has filters`, async () => { @@ -954,3 +945,324 @@ describe(`Electric Collection with Live Query - syncMode integration`, () => { ) }) }) + +// Tests specifically for loadSubset deduplication +describe(`Electric Collection - loadSubset deduplication`, () => { + let subscriber: (messages: Array>) => void + + function createElectricCollectionWithSyncMode( + syncMode: `on-demand` | `progressive` + ) { + vi.clearAllMocks() + + mockSubscribe.mockImplementation((callback) => { + subscriber = callback + return () => {} + }) + + mockRequestSnapshot.mockResolvedValue({ + data: [], + }) + + const config = { + id: `electric-dedupe-test-${syncMode}`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `users`, + }, + }, + syncMode, + getKey: (user: User) => user.id, + } + + const options = electricCollectionOptions(config) + return createCollection({ + ...options, + startSync: true, + autoIndex: `eager` as const, + }) + } + + function simulateInitialSync(users: Array = sampleUsers) { + const messages: Array> = users.map((user) => ({ + key: user.id.toString(), + value: user, + headers: { operation: `insert` }, + })) + + messages.push({ + headers: { control: `up-to-date` }, + }) + + subscriber(messages) + } + + it(`should deduplicate identical concurrent loadSubset requests`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create three identical live queries concurrently + // Without deduplication, this would trigger 3 requestSnapshot calls + // With deduplication, only 1 should be made + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // With deduplication, only 1 requestSnapshot call should be made + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + where: `active = $1`, + params: { "1": `true` }, + orderBy: `age NULLS FIRST`, + limit: 10, + }) + ) + }) + + it(`should deduplicate subset loadSubset requests`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query with a broader predicate + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 10)) + .orderBy(({ user }) => user.age, `asc`) + .limit(20), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create a live query with a subset predicate (age > 20 is subset of age > 10) + // This should be deduped - no additional requestSnapshot call + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 20)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still only 1 call - the second was deduped as a subset + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + }) + + it(`should NOT deduplicate non-subset loadSubset requests`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query with a narrower predicate + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 30)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create a live query with a broader predicate (age > 20 is NOT subset of age > 30) + // This should NOT be deduped - should trigger another requestSnapshot + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 20)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have 2 calls - the second was not a subset + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + }) + + it(`should reset deduplication state on must-refetch/truncate`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync(sampleUsers) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Simulate a must-refetch (which triggers truncate and reset) + subscriber([{ headers: { control: `must-refetch` } }]) + subscriber([{ headers: { control: `up-to-date` } }]) + + // Wait for the existing live query to re-request data after truncate + await new Promise((resolve) => setTimeout(resolve, 0)) + + // The existing live query re-requests its data after truncate (call 2) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + + // Create the same live query again after reset + // This should NOT be deduped because the reset cleared the deduplication state, + // but it WILL be deduped because the existing live query just made the same request (call 2) + // So creating a different query to ensure we test the reset + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, false)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have 3 calls - the different query triggered a new request + expect(mockRequestSnapshot).toHaveBeenCalledTimes(3) + }) + + it(`should deduplicate unlimited queries regardless of orderBy`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query without limit (unlimited) + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create another unlimited query with same where but different orderBy + // This should be deduped - orderBy is ignored for unlimited queries + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.name, `desc`), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still only 1 call - different orderBy doesn't matter for unlimited queries + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + }) + + it(`should combine multiple unlimited queries with union`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create first unlimited query (age > 30) + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 30)), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create second unlimited query (age < 20) - different range + // This should trigger a new request + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => lt(user.age, 20)), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + + // Create third query (age > 35) - this is a subset of (age > 30) + // This should be deduped + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 35)), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still 2 calls - third was covered by the union of first two + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + }) +}) diff --git a/packages/electric-db-collection/tests/electric.test-d.ts b/packages/electric-db-collection/tests/electric.test-d.ts index b45d47370..27f90918d 100644 --- a/packages/electric-db-collection/tests/electric.test-d.ts +++ b/packages/electric-db-collection/tests/electric.test-d.ts @@ -1,6 +1,7 @@ import { describe, expectTypeOf, it } from "vitest" import { z } from "zod" import { + and, createCollection, createLiveQueryCollection, eq, @@ -200,7 +201,7 @@ describe(`Electric collection type resolution tests`, () => { query: (q) => q .from({ user: usersCollection }) - .where(({ user }) => eq(user.active, true) && gt(user.age, 18)) + .where(({ user }) => and(eq(user.active, true), gt(user.age, 18))) .select(({ user }) => ({ id: user.id, name: user.name, diff --git a/packages/electric-db-collection/tsconfig.json b/packages/electric-db-collection/tsconfig.json index 7e586bab3..fc6368937 100644 --- a/packages/electric-db-collection/tsconfig.json +++ b/packages/electric-db-collection/tsconfig.json @@ -12,7 +12,9 @@ "forceConsistentCasingInFileNames": true, "jsx": "react", "paths": { - "@tanstack/store": ["../store/src"] + "@tanstack/store": ["../store/src"], + "@tanstack/db-ivm": ["../db-ivm/src"], + "@tanstack/db": ["../db/src"] } }, "include": ["src", "tests", "vite.config.ts"],