diff --git a/.changeset/light-phones-flash.md b/.changeset/light-phones-flash.md new file mode 100644 index 000000000..95a030b32 --- /dev/null +++ b/.changeset/light-phones-flash.md @@ -0,0 +1,5 @@ +--- +"@tanstack/db": patch +--- + +Add predicate comparison and merging utilities (isWhereSubset, intersectWherePredicates, unionWherePredicates, and related functions) to support predicate push-down in collection sync operations, enabling efficient tracking of loaded data ranges and preventing redundant server requests. Includes performance optimizations for large primitive IN predicates and full support for Date objects in equality, range, and IN clause comparisons. diff --git a/.changeset/tender-carpets-cheat.md b/.changeset/tender-carpets-cheat.md new file mode 100644 index 000000000..77c9dfd73 --- /dev/null +++ b/.changeset/tender-carpets-cheat.md @@ -0,0 +1,5 @@ +--- +"@tanstack/electric-db-collection": patch +--- + +Handle predicates that are pushed down. diff --git a/packages/db/src/query/index.ts b/packages/db/src/query/index.ts index 17f4dd8e7..78dd94bfe 100644 --- a/packages/db/src/query/index.ts +++ b/packages/db/src/query/index.ts @@ -57,3 +57,18 @@ export { export { type LiveQueryCollectionConfig } from "./live/types.js" export { type LiveQueryCollectionUtils } from "./live/collection-config-builder.js" + +// Predicate utilities for predicate push-down +export { + isWhereSubset, + intersectWherePredicates, + unionWherePredicates, + minusWherePredicates, + isOrderBySubset, + isLimitSubset, + isPredicateSubset, + intersectPredicates, + unionPredicates, +} from "./predicate-utils.js" + +export { DeduplicatedLoadSubset } from "./subset-dedupe.js" diff --git a/packages/db/src/query/predicate-utils.ts b/packages/db/src/query/predicate-utils.ts new file mode 100644 index 000000000..b979a5550 --- /dev/null +++ b/packages/db/src/query/predicate-utils.ts @@ -0,0 +1,1543 @@ +import type { BasicExpression, Func, OrderBy, PropRef } from "./ir.js" +import type { LoadSubsetOptions } from "../types.js" + +/** + * Check if one where clause is a logical subset of another. + * Returns true if the subset predicate is more restrictive than (or equal to) the superset predicate. + * + * @example + * // age > 20 is subset of age > 10 (more restrictive) + * isWhereSubset(gt(ref('age'), val(20)), gt(ref('age'), val(10))) // true + * + * @example + * // age > 10 AND name = 'X' is subset of age > 10 (more conditions) + * isWhereSubset(and(gt(ref('age'), val(10)), eq(ref('name'), val('X'))), gt(ref('age'), val(10))) // true + * + * @param subset - The potentially more restrictive predicate + * @param superset - The potentially less restrictive predicate + * @returns true if subset logically implies superset + */ +export function isWhereSubset( + subset: BasicExpression | undefined, + superset: BasicExpression | undefined +): boolean { + // undefined/missing where clause means "no filter" (all data) + // Both undefined means subset relationship holds (all data ⊆ all data) + if (subset === undefined && superset === undefined) { + return true + } + + // If subset is undefined but superset is not, we're requesting ALL data + // but have only loaded SOME data - subset relationship does NOT hold + if (subset === undefined && superset !== undefined) { + return false + } + + // If superset is undefined (no filter = all data loaded), + // then any constrained subset is contained + if (superset === undefined && subset !== undefined) { + return true + } + + return isWhereSubsetInternal(subset!, superset!) +} + +function isWhereSubsetInternal( + subset: BasicExpression, + superset: BasicExpression +): boolean { + // If expressions are structurally equal, subset relationship holds + if (areExpressionsEqual(subset, superset)) { + return true + } + + // Handle superset being an AND: subset must imply ALL conjuncts + // If superset is (A AND B), then subset ⊆ (A AND B) only if subset ⊆ A AND subset ⊆ B + // Example: (age > 20) ⊆ (age > 10 AND status = 'active') is false (doesn't imply status condition) + if (superset.type === `func` && superset.name === `and`) { + return superset.args.every((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + + // Handle subset being an AND: (A AND B) implies both A and B + if (subset.type === `func` && subset.name === `and`) { + // For (A AND B) ⊆ C, since (A AND B) implies A, we check if any conjunct implies C + return subset.args.some((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) + ) + } + + // Handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C + if (subset.type === `func` && subset.name === `or`) { + return subset.args.every((arg) => + isWhereSubsetInternal(arg as BasicExpression, superset) + ) + } + + // Handle OR in superset: subset ⊆ (A OR B) if subset ⊆ A or subset ⊆ B + // (A OR B) as superset means data can satisfy A or B + // If subset is contained in any disjunct, it's contained in the union + if (superset.type === `func` && superset.name === `or`) { + return superset.args.some((arg) => + isWhereSubsetInternal(subset, arg as BasicExpression) + ) + } + + // Handle comparison operators on the same field + if (subset.type === `func` && superset.type === `func`) { + const subsetFunc = subset as Func + const supersetFunc = superset as Func + + // Check if both are comparisons on the same field + const subsetField = extractComparisonField(subsetFunc) + const supersetField = extractComparisonField(supersetFunc) + + if ( + subsetField && + supersetField && + areRefsEqual(subsetField.ref, supersetField.ref) + ) { + return isComparisonSubset( + subsetFunc, + subsetField.value, + supersetFunc, + supersetField.value + ) + } + + // Handle eq vs in + if (subsetFunc.name === `eq` && supersetFunc.name === `in`) { + const subsetFieldEq = extractEqualityField(subsetFunc) + const supersetFieldIn = extractInField(supersetFunc) + if ( + subsetFieldEq && + supersetFieldIn && + areRefsEqual(subsetFieldEq.ref, supersetFieldIn.ref) + ) { + // field = X is subset of field IN [X, Y, Z] if X is in the array + // Use cached primitive set and metadata from extraction + return arrayIncludesWithSet( + supersetFieldIn.values, + subsetFieldEq.value, + supersetFieldIn.primitiveSet ?? null, + supersetFieldIn.areAllPrimitives + ) + } + } + + // Handle in vs in + if (subsetFunc.name === `in` && supersetFunc.name === `in`) { + const subsetFieldIn = extractInField(subsetFunc) + const supersetFieldIn = extractInField(supersetFunc) + if ( + subsetFieldIn && + supersetFieldIn && + areRefsEqual(subsetFieldIn.ref, supersetFieldIn.ref) + ) { + // field IN [A, B] is subset of field IN [A, B, C] if all values in subset are in superset + // Use cached primitive set and metadata from extraction + return subsetFieldIn.values.every((subVal) => + arrayIncludesWithSet( + supersetFieldIn.values, + subVal, + supersetFieldIn.primitiveSet ?? null, + supersetFieldIn.areAllPrimitives + ) + ) + } + } + } + + // Conservative: if we can't determine, return false + return false +} + +/** + * Helper to combine where predicates with common logic for AND/OR operations + */ +function combineWherePredicates( + predicates: Array>, + operation: `and` | `or`, + simplifyFn: ( + preds: Array> + ) => BasicExpression | null +): BasicExpression { + const emptyValue = operation === `and` ? true : false + const identityValue = operation === `and` ? true : false + + if (predicates.length === 0) { + return { type: `val`, value: emptyValue } as BasicExpression + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Flatten nested expressions of the same operation + const flatPredicates: Array> = [] + for (const pred of predicates) { + if (pred.type === `func` && pred.name === operation) { + flatPredicates.push(...(pred.args as Array>)) + } else { + flatPredicates.push(pred) + } + } + + // Group predicates by field for simplification + const grouped = groupPredicatesByField(flatPredicates) + + // Simplify each group + const simplified: Array> = [] + for (const [field, preds] of grouped.entries()) { + if (field === null) { + // Complex predicates that we can't group by field + simplified.push(...preds) + } else { + // Try to simplify same-field predicates + const result = simplifyFn(preds) + + // For intersection: check for empty set (contradiction) + if ( + operation === `and` && + result && + result.type === `val` && + result.value === false + ) { + // Intersection is empty (conflicting constraints) - entire AND is false + return { type: `val`, value: false } as BasicExpression + } + + // For union: result may be null if simplification failed + if (result) { + simplified.push(result) + } + } + } + + if (simplified.length === 0) { + return { type: `val`, value: identityValue } as BasicExpression + } + + if (simplified.length === 1) { + return simplified[0]! + } + + // Return combined predicate + return { + type: `func`, + name: operation, + args: simplified, + } as BasicExpression +} + +/** + * Combine multiple where predicates with AND logic (intersection). + * Returns a predicate that is satisfied only when all input predicates are satisfied. + * Simplifies when possible (e.g., age > 10 AND age > 20 → age > 20). + * Returns a false literal if predicates are contradictory (empty set). + * + * @example + * // Take most restrictive + * intersectWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 20 + * + * @example + * // Different fields combine with AND + * intersectWherePredicates([gt(ref('age'), val(10)), eq(ref('status'), val('active'))]) + * // age > 10 AND status = 'active' + * + * @example + * // Contradictory predicates return false + * intersectWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(6))]) + * // {type: 'val', value: false} + * + * @param predicates - Array of where predicates to intersect + * @returns Combined predicate representing the intersection, or false literal for empty set + */ +export function intersectWherePredicates( + predicates: Array> +): BasicExpression { + return combineWherePredicates(predicates, `and`, intersectSameFieldPredicates) +} + +/** + * Combine multiple where predicates with OR logic (union). + * Returns a predicate that is satisfied when any input predicate is satisfied. + * Simplifies when possible (e.g., age > 10 OR age > 20 → age > 10). + * + * @example + * // Take least restrictive + * unionWherePredicates([gt(ref('age'), val(10)), gt(ref('age'), val(20))]) // age > 10 + * + * @example + * // Combine equals into IN + * unionWherePredicates([eq(ref('age'), val(5)), eq(ref('age'), val(10))]) // age IN [5, 10] + * + * @param predicates - Array of where predicates to union + * @returns Combined predicate representing the union + */ +export function unionWherePredicates( + predicates: Array> +): BasicExpression { + return combineWherePredicates(predicates, `or`, unionSameFieldPredicates) +} + +/** + * Compute the difference between two where predicates: `fromPredicate AND NOT(subtractPredicate)`. + * Returns the simplified predicate, or null if the difference cannot be simplified + * (in which case the caller should fetch the full fromPredicate). + * + * @example + * // Range difference + * minusWherePredicates( + * gt(ref('age'), val(10)), // age > 10 + * gt(ref('age'), val(20)) // age > 20 + * ) // → age > 10 AND age <= 20 + * + * @example + * // Set difference + * minusWherePredicates( + * inOp(ref('status'), ['A', 'B', 'C', 'D']), // status IN ['A','B','C','D'] + * inOp(ref('status'), ['B', 'C']) // status IN ['B','C'] + * ) // → status IN ['A', 'D'] + * + * @example + * // Complete overlap - empty result + * minusWherePredicates( + * gt(ref('age'), val(20)), // age > 20 + * gt(ref('age'), val(10)) // age > 10 + * ) // → {type: 'val', value: false} (empty set) + * + * @param fromPredicate - The predicate to subtract from + * @param subtractPredicate - The predicate to subtract + * @returns The simplified difference, or null if cannot be simplified + */ +export function minusWherePredicates( + fromPredicate: BasicExpression | undefined, + subtractPredicate: BasicExpression | undefined +): BasicExpression | null { + // If nothing to subtract, return the original + if (subtractPredicate === undefined) { + return ( + fromPredicate ?? + ({ type: `val`, value: true } as BasicExpression) + ) + } + + // If from is undefined (all data), we can't simplify NOT(subtract) + // Return null to indicate caller should fetch all data + if (fromPredicate === undefined) { + return null + } + + // Check if fromPredicate is entirely contained in subtractPredicate + // In that case, fromPredicate AND NOT(subtractPredicate) = empty set + if (isWhereSubset(fromPredicate, subtractPredicate)) { + return { type: `val`, value: false } as BasicExpression + } + + // Check if they are on the same field - if so, we can try to simplify + if (fromPredicate.type === `func` && subtractPredicate.type === `func`) { + const result = minusSameFieldPredicates(fromPredicate, subtractPredicate) + if (result !== null) { + return result + } + } + + // Can't simplify - return null to indicate caller should fetch full fromPredicate + return null +} + +/** + * Helper function to compute difference for same-field predicates + */ +function minusSameFieldPredicates( + fromPred: Func, + subtractPred: Func +): BasicExpression | null { + // Extract field information + const fromField = + extractComparisonField(fromPred) || + extractEqualityField(fromPred) || + extractInField(fromPred) + const subtractField = + extractComparisonField(subtractPred) || + extractEqualityField(subtractPred) || + extractInField(subtractPred) + + // Must be on the same field + if ( + !fromField || + !subtractField || + !areRefsEqual(fromField.ref, subtractField.ref) + ) { + return null + } + + // Handle IN minus IN: status IN [A,B,C,D] - status IN [B,C] = status IN [A,D] + if (fromPred.name === `in` && subtractPred.name === `in`) { + const fromInField = fromField as InField + const subtractInField = subtractField as InField + + // Filter out values that are in the subtract set + const remainingValues = fromInField.values.filter( + (v) => + !arrayIncludesWithSet( + subtractInField.values, + v, + subtractInField.primitiveSet ?? null, + subtractInField.areAllPrimitives + ) + ) + + if (remainingValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (remainingValues.length === 1) { + return { + type: `func`, + name: `eq`, + args: [fromField.ref, { type: `val`, value: remainingValues[0] }], + } as BasicExpression + } + + return { + type: `func`, + name: `in`, + args: [fromField.ref, { type: `val`, value: remainingValues }], + } as BasicExpression + } + + // Handle IN minus equality: status IN [A,B,C] - status = B = status IN [A,C] + if (fromPred.name === `in` && subtractPred.name === `eq`) { + const fromInField = fromField as InField + const subtractValue = (subtractField as { ref: PropRef; value: any }).value + + const remainingValues = fromInField.values.filter( + (v) => !areValuesEqual(v, subtractValue) + ) + + if (remainingValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + + if (remainingValues.length === 1) { + return { + type: `func`, + name: `eq`, + args: [fromField.ref, { type: `val`, value: remainingValues[0] }], + } as BasicExpression + } + + return { + type: `func`, + name: `in`, + args: [fromField.ref, { type: `val`, value: remainingValues }], + } as BasicExpression + } + + // Handle equality minus equality: age = 15 - age = 15 = empty, age = 15 - age = 20 = age = 15 + if (fromPred.name === `eq` && subtractPred.name === `eq`) { + const fromValue = (fromField as { ref: PropRef; value: any }).value + const subtractValue = (subtractField as { ref: PropRef; value: any }).value + + if (areValuesEqual(fromValue, subtractValue)) { + return { type: `val`, value: false } as BasicExpression + } + + // No overlap - return original + return fromPred as BasicExpression + } + + // Handle range minus range: age > 10 - age > 20 = age > 10 AND age <= 20 + const fromComp = extractComparisonField(fromPred) + const subtractComp = extractComparisonField(subtractPred) + + if ( + fromComp && + subtractComp && + areRefsEqual(fromComp.ref, subtractComp.ref) + ) { + // Try to compute the difference using range logic + const result = minusRangePredicates( + fromPred, + fromComp.value, + subtractPred, + subtractComp.value + ) + return result + } + + // Can't simplify + return null +} + +/** + * Helper to compute difference between range predicates + */ +function minusRangePredicates( + fromFunc: Func, + fromValue: any, + subtractFunc: Func, + subtractValue: any +): BasicExpression | null { + const fromOp = fromFunc.name as `gt` | `gte` | `lt` | `lte` | `eq` + const subtractOp = subtractFunc.name as `gt` | `gte` | `lt` | `lte` | `eq` + const ref = (extractComparisonField(fromFunc) || + extractEqualityField(fromFunc))!.ref + + // age > 10 - age > 20 = (age > 10 AND age <= 20) + if (fromOp === `gt` && subtractOp === `gt`) { + if (fromValue < subtractValue) { + // Result is: fromValue < field <= subtractValue + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + // fromValue >= subtractValue means no overlap + return fromFunc as BasicExpression + } + + // age >= 10 - age >= 20 = (age >= 10 AND age < 20) + if (fromOp === `gte` && subtractOp === `gte`) { + if (fromValue < subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age > 10 - age >= 20 = (age > 10 AND age < 20) + if (fromOp === `gt` && subtractOp === `gte`) { + if (fromValue < subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age >= 10 - age > 20 = (age >= 10 AND age <= 20) + if (fromOp === `gte` && subtractOp === `gt`) { + if (fromValue <= subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + fromFunc as BasicExpression, + { + type: `func`, + name: `lte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age < 30 - age < 20 = (age >= 20 AND age < 30) + if (fromOp === `lt` && subtractOp === `lt`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age <= 30 - age <= 20 = (age > 20 AND age <= 30) + if (fromOp === `lte` && subtractOp === `lte`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age < 30 - age <= 20 = (age > 20 AND age < 30) + if (fromOp === `lt` && subtractOp === `lte`) { + if (fromValue > subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gt`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // age <= 30 - age < 20 = (age >= 20 AND age <= 30) + if (fromOp === `lte` && subtractOp === `lt`) { + if (fromValue >= subtractValue) { + return { + type: `func`, + name: `and`, + args: [ + { + type: `func`, + name: `gte`, + args: [ref, { type: `val`, value: subtractValue }], + } as BasicExpression, + fromFunc as BasicExpression, + ], + } as BasicExpression + } + return fromFunc as BasicExpression + } + + // Can't simplify other combinations + return null +} + +/** + * Check if one orderBy clause is a subset of another. + * Returns true if the subset ordering requirements are satisfied by the superset ordering. + * + * @example + * // Subset is prefix of superset + * isOrderBySubset([{expr: age, asc}], [{expr: age, asc}, {expr: name, desc}]) // true + * + * @param subset - The ordering requirements to check + * @param superset - The ordering that might satisfy the requirements + * @returns true if subset is satisfied by superset + */ +export function isOrderBySubset( + subset: OrderBy | undefined, + superset: OrderBy | undefined +): boolean { + // No ordering requirement is always satisfied + if (!subset || subset.length === 0) { + return true + } + + // If there's no superset ordering but subset requires ordering, not satisfied + if (!superset || superset.length === 0) { + return false + } + + // Check if subset is a prefix of superset with matching expressions and compare options + if (subset.length > superset.length) { + return false + } + + for (let i = 0; i < subset.length; i++) { + const subClause = subset[i]! + const superClause = superset[i]! + + // Check if expressions match + if (!areExpressionsEqual(subClause.expression, superClause.expression)) { + return false + } + + // Check if compare options match + if ( + !areCompareOptionsEqual( + subClause.compareOptions, + superClause.compareOptions + ) + ) { + return false + } + } + + return true +} + +/** + * Check if one limit is a subset of another. + * Returns true if the subset limit requirements are satisfied by the superset limit. + * + * @example + * isLimitSubset(10, 20) // true (requesting 10 items when 20 are available) + * isLimitSubset(20, 10) // false (requesting 20 items when only 10 are available) + * isLimitSubset(10, undefined) // true (requesting 10 items when unlimited are available) + * + * @param subset - The limit requirement to check + * @param superset - The limit that might satisfy the requirement + * @returns true if subset is satisfied by superset + */ +export function isLimitSubset( + subset: number | undefined, + superset: number | undefined +): boolean { + // No limit requirement is always satisfied + if (subset === undefined) { + return true + } + + // Unlimited superset satisfies any limit requirement + if (superset === undefined) { + return true + } + + // Otherwise, subset must be less than or equal to superset + return subset <= superset +} + +/** + * Check if one predicate (where + orderBy + limit) is a subset of another. + * Returns true if all aspects of the subset predicate are satisfied by the superset. + * + * @example + * isPredicateSubset( + * { where: gt(ref('age'), val(20)), limit: 10 }, + * { where: gt(ref('age'), val(10)), limit: 20 } + * ) // true + * + * @param subset - The predicate requirements to check + * @param superset - The predicate that might satisfy the requirements + * @returns true if subset is satisfied by superset + */ +export function isPredicateSubset( + subset: LoadSubsetOptions, + superset: LoadSubsetOptions +): boolean { + return ( + isWhereSubset(subset.where, superset.where) && + isOrderBySubset(subset.orderBy, superset.orderBy) && + isLimitSubset(subset.limit, superset.limit) + ) +} + +/** + * Helper to combine predicates (where + orderBy + limit) + */ +function combinePredicates( + predicates: Array, + operation: `intersect` | `union`, + whereFn: ( + clauses: Array> + ) => BasicExpression +): LoadSubsetOptions { + if (predicates.length === 0) { + return {} + } + + if (predicates.length === 1) { + return predicates[0]! + } + + // Combine where clauses + const whereClauses = predicates + .map((p) => p.where) + .filter((w): w is BasicExpression => w !== undefined) + + const mergedWhere = + whereClauses.length > 0 ? whereFn(whereClauses) : undefined + + // OrderBy logic differs by operation + const mergedOrderBy = + operation === `intersect` + ? predicates.find((p) => p.orderBy && p.orderBy.length > 0)?.orderBy + : undefined // Union: different orderings can't be combined + + // Limit logic + const limits = predicates + .map((p) => p.limit) + .filter((l): l is number => l !== undefined) + + const mergedLimit = + operation === `intersect` + ? limits.length === 0 + ? undefined + : Math.min(...limits) // All unlimited = unlimited, else min + : limits.length === predicates.length && limits.length > 0 + ? Math.min(...limits) + : undefined // Min only if all have limits + + return { + where: mergedWhere, + orderBy: mergedOrderBy, + limit: mergedLimit, + } +} + +/** + * Merge multiple predicates by intersecting their where clauses. + * Intersection semantics: returns predicate satisfied by data matching ALL input predicates. + * For limits, this means the MINIMUM (most restrictive) limit. + * + * @param predicates - Array of predicates to merge + * @returns Combined predicate representing the intersection + */ +export function intersectPredicates( + predicates: Array +): LoadSubsetOptions { + return combinePredicates(predicates, `intersect`, intersectWherePredicates) +} + +/** + * Merge multiple predicates by unioning their where clauses. + * + * @param predicates - Array of predicates to merge + * @returns Combined predicate + */ +export function unionPredicates( + predicates: Array +): LoadSubsetOptions { + return combinePredicates(predicates, `union`, unionWherePredicates) +} + +// ============================================================================ +// Helper functions +// ============================================================================ + +/** + * Find a predicate with a specific operator and value + */ +function findPredicateWithOperator( + predicates: Array>, + operator: string, + value: any +): BasicExpression | undefined { + return predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + const field = extractComparisonField(f) + return f.name === operator && field && areValuesEqual(field.value, value) + } + return false + }) +} + +function areExpressionsEqual(a: BasicExpression, b: BasicExpression): boolean { + if (a.type !== b.type) { + return false + } + + if (a.type === `val` && b.type === `val`) { + return areValuesEqual(a.value, b.value) + } + + if (a.type === `ref` && b.type === `ref`) { + return areRefsEqual(a, b) + } + + if (a.type === `func` && b.type === `func`) { + const aFunc = a + const bFunc = b + if (aFunc.name !== bFunc.name) { + return false + } + if (aFunc.args.length !== bFunc.args.length) { + return false + } + return aFunc.args.every((arg, i) => + areExpressionsEqual(arg, bFunc.args[i]!) + ) + } + + return false +} + +function areValuesEqual(a: any, b: any): boolean { + // Simple equality check - could be enhanced for deep object comparison + if (a === b) { + return true + } + + // Handle NaN + if (typeof a === `number` && typeof b === `number` && isNaN(a) && isNaN(b)) { + return true + } + + // Handle Date objects + if (a instanceof Date && b instanceof Date) { + return a.getTime() === b.getTime() + } + + // For arrays and objects, use reference equality + // (In practice, we don't need deep equality for these cases - + // same object reference means same value for our use case) + if ( + typeof a === `object` && + typeof b === `object` && + a !== null && + b !== null + ) { + return a === b + } + + return false +} + +function areRefsEqual(a: PropRef, b: PropRef): boolean { + if (a.path.length !== b.path.length) { + return false + } + return a.path.every((segment, i) => segment === b.path[i]) +} + +/** + * Check if a value is a primitive (string, number, boolean, null, undefined) + * Primitives can use Set for fast lookups + */ +function isPrimitive(value: any): boolean { + return ( + value === null || + value === undefined || + typeof value === `string` || + typeof value === `number` || + typeof value === `boolean` + ) +} + +/** + * Check if all values in an array are primitives + */ +function areAllPrimitives(values: Array): boolean { + return values.every(isPrimitive) +} + +/** + * Check if a value is in an array, with optional pre-built Set for optimization. + * The primitiveSet is cached in InField during extraction and reused for all lookups. + */ +function arrayIncludesWithSet( + array: Array, + value: any, + primitiveSet: Set | null, + arrayIsAllPrimitives?: boolean +): boolean { + // Fast path: use pre-built Set for O(1) lookup + if (primitiveSet) { + // Skip isPrimitive check if we know the value must be primitive for a match + // (if array is all primitives, only primitives can match) + if (arrayIsAllPrimitives || isPrimitive(value)) { + return primitiveSet.has(value) + } + return false // Non-primitive can't be in primitive-only set + } + + // Fallback: use areValuesEqual for Dates and objects + return array.some((v) => areValuesEqual(v, value)) +} + +/** + * Intersect two arrays, with optional pre-built Set for optimization. + * The set2 is cached in InField during extraction and reused for all operations. + */ +function intersectArraysWithSet( + arr1: Array, + arr2: Array, + set2: Set | null +): Array { + // Fast path: use pre-built Set for O(n) intersection + if (set2) { + // If set2 exists, arr2 contains ONLY primitives (that's when we build the Set). + // So we can skip non-primitives in arr1 immediately - they can't be in arr2. + return arr1.filter((v) => isPrimitive(v) && set2.has(v)) + } + + // Fallback: use areValuesEqual for all comparisons + return arr1.filter((v) => arr2.some((v2) => areValuesEqual(v, v2))) +} + +/** + * Get the maximum of two values, handling both numbers and Dates + */ +function maxValue(a: any, b: any): any { + if (a instanceof Date && b instanceof Date) { + return a.getTime() > b.getTime() ? a : b + } + return Math.max(a, b) +} + +/** + * Get the minimum of two values, handling both numbers and Dates + */ +function minValue(a: any, b: any): any { + if (a instanceof Date && b instanceof Date) { + return a.getTime() < b.getTime() ? a : b + } + return Math.min(a, b) +} + +function areCompareOptionsEqual( + a: { direction?: `asc` | `desc`; [key: string]: any }, + b: { direction?: `asc` | `desc`; [key: string]: any } +): boolean { + // For now, just compare direction - could be enhanced for other options + return a.direction === b.direction +} + +interface ComparisonField { + ref: PropRef + value: any +} + +function extractComparisonField(func: Func): ComparisonField | null { + // Handle comparison operators: eq, gt, gte, lt, lte + if ([`eq`, `gt`, `gte`, `lt`, `lte`].includes(func.name)) { + // Assume first arg is ref, second is value + const firstArg = func.args[0] + const secondArg = func.args[1] + + if (firstArg?.type === `ref` && secondArg?.type === `val`) { + return { + ref: firstArg, + value: secondArg.value, + } + } + } + + return null +} + +function extractEqualityField(func: Func): ComparisonField | null { + if (func.name === `eq`) { + const firstArg = func.args[0] + const secondArg = func.args[1] + + if (firstArg?.type === `ref` && secondArg?.type === `val`) { + return { + ref: firstArg, + value: secondArg.value, + } + } + } + return null +} + +interface InField { + ref: PropRef + values: Array + // Cached optimization data (computed once, reused many times) + areAllPrimitives?: boolean + primitiveSet?: Set | null +} + +function extractInField(func: Func): InField | null { + if (func.name === `in`) { + const firstArg = func.args[0] + const secondArg = func.args[1] + + if ( + firstArg?.type === `ref` && + secondArg?.type === `val` && + Array.isArray(secondArg.value) + ) { + let values = secondArg.value + // Precompute optimization metadata once + const allPrimitives = areAllPrimitives(values) + let primitiveSet: Set | null = null + + if (allPrimitives && values.length > 10) { + // Build Set and dedupe values at the same time + primitiveSet = new Set(values) + // If we found duplicates, use the deduped array going forward + if (primitiveSet.size < values.length) { + values = Array.from(primitiveSet) + } + } + + return { + ref: firstArg, + values, + areAllPrimitives: allPrimitives, + primitiveSet, + } + } + } + return null +} + +function isComparisonSubset( + subsetFunc: Func, + subsetValue: any, + supersetFunc: Func, + supersetValue: any +): boolean { + const subOp = subsetFunc.name + const superOp = supersetFunc.name + + // Handle same operator + if (subOp === superOp) { + if (subOp === `eq`) { + // field = X is subset of field = X only + // Fast path: primitives can use strict equality + if (isPrimitive(subsetValue) && isPrimitive(supersetValue)) { + return subsetValue === supersetValue + } + return areValuesEqual(subsetValue, supersetValue) + } else if (subOp === `gt`) { + // field > 20 is subset of field > 10 if 20 > 10 + return subsetValue >= supersetValue + } else if (subOp === `gte`) { + // field >= 20 is subset of field >= 10 if 20 >= 10 + return subsetValue >= supersetValue + } else if (subOp === `lt`) { + // field < 10 is subset of field < 20 if 10 <= 20 + return subsetValue <= supersetValue + } else if (subOp === `lte`) { + // field <= 10 is subset of field <= 20 if 10 <= 20 + return subsetValue <= supersetValue + } + } + + // Handle different operators on same field + // eq vs gt/gte: field = 15 is subset of field > 10 if 15 > 10 + if (subOp === `eq` && superOp === `gt`) { + return subsetValue > supersetValue + } + if (subOp === `eq` && superOp === `gte`) { + return subsetValue >= supersetValue + } + if (subOp === `eq` && superOp === `lt`) { + return subsetValue < supersetValue + } + if (subOp === `eq` && superOp === `lte`) { + return subsetValue <= supersetValue + } + + // gt/gte vs gte/gt + if (subOp === `gt` && superOp === `gte`) { + // field > 10 is subset of field >= 10 if 10 >= 10 (always true for same value) + return subsetValue >= supersetValue + } + if (subOp === `gte` && superOp === `gt`) { + // field >= 11 is subset of field > 10 if 11 > 10 + return subsetValue > supersetValue + } + + // lt/lte vs lte/lt + if (subOp === `lt` && superOp === `lte`) { + // field < 10 is subset of field <= 10 if 10 <= 10 + return subsetValue <= supersetValue + } + if (subOp === `lte` && superOp === `lt`) { + // field <= 9 is subset of field < 10 if 9 < 10 + return subsetValue < supersetValue + } + + return false +} + +function groupPredicatesByField( + predicates: Array> +): Map>> { + const groups = new Map>>() + + for (const pred of predicates) { + let fieldKey: string | null = null + + if (pred.type === `func`) { + const func = pred as Func + const field = + extractComparisonField(func) || + extractEqualityField(func) || + extractInField(func) + if (field) { + fieldKey = field.ref.path.join(`.`) + } + } + + const group = groups.get(fieldKey) || [] + group.push(pred) + groups.set(fieldKey, group) + } + + return groups +} + +function intersectSameFieldPredicates( + predicates: Array> +): BasicExpression { + if (predicates.length === 1) { + return predicates[0]! + } + + // Try to extract range constraints + let minGt: number | null = null + let minGte: number | null = null + let maxLt: number | null = null + let maxLte: number | null = null + const eqValues: Set = new Set() + const inFields: Array = [] // Store full InField objects to access cached data + const otherPredicates: Array> = [] + + for (const pred of predicates) { + if (pred.type === `func`) { + const func = pred as Func + const field = extractComparisonField(func) + + if (field) { + const value = field.value + if (func.name === `gt`) { + minGt = minGt === null ? value : maxValue(minGt, value) + } else if (func.name === `gte`) { + minGte = minGte === null ? value : maxValue(minGte, value) + } else if (func.name === `lt`) { + maxLt = maxLt === null ? value : minValue(maxLt, value) + } else if (func.name === `lte`) { + maxLte = maxLte === null ? value : minValue(maxLte, value) + } else if (func.name === `eq`) { + eqValues.add(value) + } else { + otherPredicates.push(pred) + } + } else { + const inField = extractInField(func) + if (inField) { + inFields.push(inField) // Store full InField with cached primitiveSet + } else { + otherPredicates.push(pred) + } + } + } else { + otherPredicates.push(pred) + } + } + + // Check for conflicting equality values (field = 5 AND field = 6 → empty set) + // Need to use areValuesEqual for proper Date/object comparison + const uniqueEqValues: Array = [] + for (const value of eqValues) { + if (!uniqueEqValues.some((v) => areValuesEqual(v, value))) { + uniqueEqValues.push(value) + } + } + if (uniqueEqValues.length > 1) { + return { type: `val`, value: false } as BasicExpression + } + + // If we have an equality, that's the most restrictive + const eqValue = uniqueEqValues.length === 1 ? uniqueEqValues[0] : null + if (eqValue !== null) { + // Check if it satisfies the range constraints + if (minGt !== null && !(eqValue > minGt)) { + return { type: `val`, value: false } as BasicExpression + } + if (minGte !== null && !(eqValue >= minGte)) { + return { type: `val`, value: false } as BasicExpression + } + if (maxLt !== null && !(eqValue < maxLt)) { + return { type: `val`, value: false } as BasicExpression + } + if (maxLte !== null && !(eqValue <= maxLte)) { + return { type: `val`, value: false } as BasicExpression + } + + // Check if it's in all IN sets (use cached primitive sets and metadata) + for (const inField of inFields) { + if ( + !arrayIncludesWithSet( + inField.values, + eqValue, + inField.primitiveSet ?? null, + inField.areAllPrimitives + ) + ) { + return { type: `val`, value: false } as BasicExpression + } + } + + // Return just the equality (use areValuesEqual for Date support) + return predicates.find((p) => { + if (p.type === `func`) { + const f = p as Func + const field = extractComparisonField(f) + return f.name === `eq` && field && areValuesEqual(field.value, eqValue) + } + return false + })! + } + + // Handle intersection of multiple IN clauses (use cached primitive sets) + let intersectedInValues: Array | null = null + if (inFields.length > 0) { + // All primitive sets already cached in inFields from extraction + intersectedInValues = [...inFields[0]!.values] + for (let i = 1; i < inFields.length; i++) { + const currentField = inFields[i]! + intersectedInValues = intersectArraysWithSet( + intersectedInValues, + currentField.values, + currentField.primitiveSet ?? null + ) + // Early exit if intersection becomes empty + if (intersectedInValues.length === 0) { + return { type: `val`, value: false } as BasicExpression + } + } + } + + // Build the most restrictive range + const result: Array> = [] + + // Choose the most restrictive lower bound + if (minGt !== null && minGte !== null) { + // If we have both > and >=, use > if it's more restrictive + const pred = + minGt >= minGte + ? findPredicateWithOperator(predicates, `gt`, minGt) + : findPredicateWithOperator(predicates, `gte`, minGte) + if (pred) result.push(pred) + } else if (minGt !== null) { + const pred = findPredicateWithOperator(predicates, `gt`, minGt) + if (pred) result.push(pred) + } else if (minGte !== null) { + const pred = findPredicateWithOperator(predicates, `gte`, minGte) + if (pred) result.push(pred) + } + + // Choose the most restrictive upper bound + if (maxLt !== null && maxLte !== null) { + const pred = + maxLt <= maxLte + ? findPredicateWithOperator(predicates, `lt`, maxLt) + : findPredicateWithOperator(predicates, `lte`, maxLte) + if (pred) result.push(pred) + } else if (maxLt !== null) { + const pred = findPredicateWithOperator(predicates, `lt`, maxLt) + if (pred) result.push(pred) + } else if (maxLte !== null) { + const pred = findPredicateWithOperator(predicates, `lte`, maxLte) + if (pred) result.push(pred) + } + + // Add intersected IN values if present + if (intersectedInValues !== null && intersectedInValues.length > 0) { + // Get the ref from one of the original IN predicates + const firstInPred = predicates.find((p) => { + if (p.type === `func`) { + return (p as Func).name === `in` + } + return false + }) + + if (firstInPred && firstInPred.type === `func`) { + const ref = (firstInPred as Func).args[0] + result.push({ + type: `func`, + name: `in`, + args: [ + ref, + { type: `val`, value: intersectedInValues } as BasicExpression, + ], + } as BasicExpression) + } + } + + // Add other predicates + result.push(...otherPredicates) + + if (result.length === 0) { + return { type: `val`, value: true } as BasicExpression + } + + if (result.length === 1) { + return result[0]! + } + + return { + type: `func`, + name: `and`, + args: result, + } as BasicExpression +} + +function unionSameFieldPredicates( + predicates: Array> +): BasicExpression | null { + if (predicates.length === 1) { + return predicates[0]! + } + + // Try to extract range constraints + let maxGt: number | null = null + let maxGte: number | null = null + let minLt: number | null = null + let minLte: number | null = null + const eqValues: Set = new Set() + const inValues: Set = new Set() + const otherPredicates: Array> = [] + + for (const pred of predicates) { + if (pred.type === `func`) { + const func = pred as Func + const field = extractComparisonField(func) + + if (field) { + const value = field.value + if (func.name === `gt`) { + maxGt = maxGt === null ? value : minValue(maxGt, value) + } else if (func.name === `gte`) { + maxGte = maxGte === null ? value : minValue(maxGte, value) + } else if (func.name === `lt`) { + minLt = minLt === null ? value : maxValue(minLt, value) + } else if (func.name === `lte`) { + minLte = minLte === null ? value : maxValue(minLte, value) + } else if (func.name === `eq`) { + eqValues.add(value) + } else { + otherPredicates.push(pred) + } + } else { + const inField = extractInField(func) + if (inField) { + for (const val of inField.values) { + inValues.add(val) + } + } else { + otherPredicates.push(pred) + } + } + } else { + otherPredicates.push(pred) + } + } + + // If we have multiple equality values, combine into IN + if (eqValues.size > 1 || (eqValues.size > 0 && inValues.size > 0)) { + const allValues = [...eqValues, ...inValues] + const ref = predicates.find((p) => { + if (p.type === `func`) { + const field = + extractComparisonField(p as Func) || extractInField(p as Func) + return field !== null + } + return false + }) + + if (ref && ref.type === `func`) { + const field = + extractComparisonField(ref as Func) || extractInField(ref as Func) + if (field) { + return { + type: `func`, + name: `in`, + args: [ + field.ref, + { type: `val`, value: allValues } as BasicExpression, + ], + } as BasicExpression + } + } + } + + // Build the least restrictive range + const result: Array> = [] + + // Choose the least restrictive lower bound + if (maxGt !== null && maxGte !== null) { + // Take the smaller one (less restrictive) + const pred = + maxGte <= maxGt + ? findPredicateWithOperator(predicates, `gte`, maxGte) + : findPredicateWithOperator(predicates, `gt`, maxGt) + if (pred) result.push(pred) + } else if (maxGt !== null) { + const pred = findPredicateWithOperator(predicates, `gt`, maxGt) + if (pred) result.push(pred) + } else if (maxGte !== null) { + const pred = findPredicateWithOperator(predicates, `gte`, maxGte) + if (pred) result.push(pred) + } + + // Choose the least restrictive upper bound + if (minLt !== null && minLte !== null) { + const pred = + minLte >= minLt + ? findPredicateWithOperator(predicates, `lte`, minLte) + : findPredicateWithOperator(predicates, `lt`, minLt) + if (pred) result.push(pred) + } else if (minLt !== null) { + const pred = findPredicateWithOperator(predicates, `lt`, minLt) + if (pred) result.push(pred) + } else if (minLte !== null) { + const pred = findPredicateWithOperator(predicates, `lte`, minLte) + if (pred) result.push(pred) + } + + // Add single eq value + if (eqValues.size === 1 && inValues.size === 0) { + const pred = findPredicateWithOperator(predicates, `eq`, [...eqValues][0]) + if (pred) result.push(pred) + } + + // Add IN if only IN values + if (eqValues.size === 0 && inValues.size > 0) { + result.push( + predicates.find((p) => { + if (p.type === `func`) { + return (p as Func).name === `in` + } + return false + })! + ) + } + + // Add other predicates + result.push(...otherPredicates) + + if (result.length === 0) { + return { type: `val`, value: true } as BasicExpression + } + + if (result.length === 1) { + return result[0]! + } + + return { + type: `func`, + name: `or`, + args: result, + } as BasicExpression +} diff --git a/packages/db/src/query/subset-dedupe.ts b/packages/db/src/query/subset-dedupe.ts new file mode 100644 index 000000000..b812b8511 --- /dev/null +++ b/packages/db/src/query/subset-dedupe.ts @@ -0,0 +1,243 @@ +import { + isPredicateSubset, + isWhereSubset, + unionWherePredicates, +} from "./predicate-utils.js" +import type { BasicExpression } from "./ir.js" +import type { LoadSubsetOptions } from "../types.js" + +/** + * Deduplicated wrapper for a loadSubset function. + * Tracks what data has been loaded and avoids redundant calls by applying + * subset logic to predicates. + * + * @example + * const dedupe = new DeduplicatedLoadSubset(myLoadSubset) + * + * // First call - fetches data + * await dedupe.loadSubset({ where: gt(ref('age'), val(10)) }) + * + * // Second call - subset of first, returns true immediately + * await dedupe.loadSubset({ where: gt(ref('age'), val(20)) }) + * + * // Clear state to start fresh + * dedupe.reset() + */ +export class DeduplicatedLoadSubset { + // The underlying loadSubset function to wrap + private readonly _loadSubset: ( + options: LoadSubsetOptions + ) => true | Promise + + // Combined where predicate for all unlimited calls (no limit) + private unlimitedWhere: BasicExpression | undefined = undefined + + // Flag to track if we've loaded all data (unlimited call with no where clause) + private hasLoadedAllData = false + + // List of all limited calls (with limit, possibly with orderBy) + // We clone options before storing to prevent mutation of stored predicates + private limitedCalls: Array = [] + + // Track in-flight calls to prevent concurrent duplicate requests + // We store both the options and the promise so we can apply subset logic + private inflightCalls: Array<{ + options: LoadSubsetOptions + promise: Promise + }> = [] + + // Generation counter to invalidate in-flight requests after reset() + // When reset() is called, this increments, and any in-flight completion handlers + // check if their captured generation matches before updating tracking state + private generation = 0 + + constructor( + loadSubset: (options: LoadSubsetOptions) => true | Promise + ) { + this._loadSubset = loadSubset + } + + /** + * Load a subset of data, with automatic deduplication based on previously + * loaded predicates and in-flight requests. + * + * This method is auto-bound, so it can be safely passed as a callback without + * losing its `this` context (e.g., `loadSubset: dedupe.loadSubset` in a sync config). + * + * @param options - The predicate options (where, orderBy, limit) + * @returns true if data is already loaded, or a Promise that resolves when data is loaded + */ + loadSubset = (options: LoadSubsetOptions): true | Promise => { + // If we've loaded all data, everything is covered + if (this.hasLoadedAllData) { + return true + } + + // Check against unlimited combined predicate + // If we've loaded all data matching a where clause, we don't need to refetch subsets + if (this.unlimitedWhere !== undefined && options.where !== undefined) { + if (isWhereSubset(options.where, this.unlimitedWhere)) { + return true // Data already loaded via unlimited call + } + } + + // Check against limited calls + if (options.limit !== undefined) { + const alreadyLoaded = this.limitedCalls.some((loaded) => + isPredicateSubset(options, loaded) + ) + + if (alreadyLoaded) { + return true // Already loaded + } + } + + // Check against in-flight calls using the same subset logic as resolved calls + // This prevents duplicate requests when concurrent calls have subset relationships + const matchingInflight = this.inflightCalls.find((inflight) => { + // For unlimited calls, check if the incoming where is a subset of the in-flight where + if (inflight.options.limit === undefined && options.limit === undefined) { + // Both unlimited - check where subset + if (inflight.options.where === undefined) { + // In-flight is loading all data, so incoming is covered + return true + } + if (options.where !== undefined) { + return isWhereSubset(options.where, inflight.options.where) + } + return false + } + + // For limited calls, use the full predicate subset check (where + orderBy + limit) + if (inflight.options.limit !== undefined && options.limit !== undefined) { + return isPredicateSubset(options, inflight.options) + } + + // Mixed unlimited/limited - limited calls can be covered by unlimited calls + if (inflight.options.limit === undefined && options.limit !== undefined) { + // In-flight is unlimited, incoming is limited + if (inflight.options.where === undefined) { + // In-flight is loading all data + return true + } + if (options.where !== undefined) { + return isWhereSubset(options.where, inflight.options.where) + } + } + + return false + }) + + if (matchingInflight !== undefined) { + // An in-flight call will load data that covers this request + // Return the same promise so this caller waits for the data to load + // The in-flight promise already handles tracking updates when it completes + return matchingInflight.promise + } + + // Not covered by existing data - call underlying loadSubset + const resultPromise = this._loadSubset(options) + + // Handle both sync (true) and async (Promise) return values + if (resultPromise === true) { + // Sync return - update tracking synchronously + // Clone options before storing to protect against caller mutation + this.updateTracking(cloneOptions(options)) + return true + } else { + // Async return - track the promise and update tracking after it resolves + // Clone options BEFORE entering async context to prevent mutation issues + const clonedOptions = cloneOptions(options) + + // Capture the current generation - this lets us detect if reset() was called + // while this request was in-flight, so we can skip updating tracking state + const capturedGeneration = this.generation + + // We need to create a reference to the in-flight entry so we can remove it later + const inflightEntry = { + options: clonedOptions, // Store cloned options for subset matching + promise: resultPromise + .then((result) => { + // Only update tracking if this request is still from the current generation + // If reset() was called, the generation will have incremented and we should + // not repopulate the state that was just cleared + if (capturedGeneration === this.generation) { + // Use the cloned options that we captured before any caller mutations + // This ensures we track exactly what was loaded, not what the caller changed + this.updateTracking(clonedOptions) + } + return result + }) + .finally(() => { + // Always remove from in-flight array on completion OR rejection + // This ensures failed requests can be retried instead of being cached forever + const index = this.inflightCalls.indexOf(inflightEntry) + if (index !== -1) { + this.inflightCalls.splice(index, 1) + } + }), + } + + // Store the in-flight entry so concurrent subset calls can wait for it + this.inflightCalls.push(inflightEntry) + return inflightEntry.promise + } + } + + /** + * Reset all tracking state. + * Clears the history of loaded predicates and in-flight calls. + * Use this when you want to start fresh, for example after clearing the underlying data store. + * + * Note: Any in-flight requests will still complete, but they will not update the tracking + * state after the reset. This prevents old requests from repopulating cleared state. + */ + reset(): void { + this.unlimitedWhere = undefined + this.hasLoadedAllData = false + this.limitedCalls = [] + this.inflightCalls = [] + // Increment generation to invalidate any in-flight completion handlers + // This ensures requests that were started before reset() don't repopulate the state + this.generation++ + } + + private updateTracking(options: LoadSubsetOptions): void { + // Update tracking based on whether this was a limited or unlimited call + if (options.limit === undefined) { + // Unlimited call - update combined where predicate + // We ignore orderBy for unlimited calls as mentioned in requirements + if (options.where === undefined) { + // No where clause = all data loaded + this.hasLoadedAllData = true + this.unlimitedWhere = undefined + } else if (this.unlimitedWhere === undefined) { + this.unlimitedWhere = options.where + } else { + this.unlimitedWhere = unionWherePredicates([ + this.unlimitedWhere, + options.where, + ]) + } + } else { + // Limited call - add to list for future subset checks + // Options are already cloned by caller to prevent mutation issues + this.limitedCalls.push(options) + } + } +} + +/** + * Clones a LoadSubsetOptions object to prevent mutation of stored predicates. + * This is crucial because callers often reuse the same options object and mutate + * properties like limit or where between calls. Without cloning, our stored history + * would reflect the mutated values rather than what was actually loaded. + */ +function cloneOptions(options: LoadSubsetOptions): LoadSubsetOptions { + return { + where: options.where, + orderBy: options.orderBy, + limit: options.limit, + // Note: We don't clone subscription as it's not part of predicate matching + } +} diff --git a/packages/db/tests/predicate-utils.test.ts b/packages/db/tests/predicate-utils.test.ts new file mode 100644 index 000000000..26849a204 --- /dev/null +++ b/packages/db/tests/predicate-utils.test.ts @@ -0,0 +1,1341 @@ +import { describe, expect, it } from "vitest" +import { + intersectPredicates, + intersectWherePredicates, + isLimitSubset, + isOrderBySubset, + isPredicateSubset, + isWhereSubset, + minusWherePredicates, + unionPredicates, + unionWherePredicates, +} from "../src/query/predicate-utils" +import { Func, PropRef, Value } from "../src/query/ir" +import type { BasicExpression, OrderBy, OrderByClause } from "../src/query/ir" +import type { LoadSubsetOptions } from "../src/types" + +// Helper functions to build expressions more easily +function ref(path: string | Array): PropRef { + return new PropRef(typeof path === `string` ? [path] : path) +} + +function val(value: any): Value { + return new Value(value) +} + +function func(name: string, ...args: Array): Func { + return new Func(name, args) +} + +function eq(left: BasicExpression, right: BasicExpression): Func { + return func(`eq`, left, right) +} + +function gt(left: BasicExpression, right: BasicExpression): Func { + return func(`gt`, left, right) +} + +function gte(left: BasicExpression, right: BasicExpression): Func { + return func(`gte`, left, right) +} + +function lt(left: BasicExpression, right: BasicExpression): Func { + return func(`lt`, left, right) +} + +function lte(left: BasicExpression, right: BasicExpression): Func { + return func(`lte`, left, right) +} + +function and(...args: Array): Func { + return func(`and`, ...args) +} + +function or(...args: Array): Func { + return func(`or`, ...args) +} + +function inOp(left: BasicExpression, values: Array): Func { + return func(`in`, left, val(values)) +} + +function orderByClause( + expression: BasicExpression, + direction: `asc` | `desc` = `asc` +): OrderByClause { + return { + expression, + compareOptions: { + direction, + nulls: `last`, + stringSort: `lexical`, + }, + } +} + +describe(`isWhereSubset`, () => { + describe(`basic cases`, () => { + it(`should return true for both undefined (all data is subset of all data)`, () => { + expect(isWhereSubset(undefined, undefined)).toBe(true) + }) + + it(`should return false for undefined subset with constrained superset`, () => { + // Requesting ALL data but only loaded SOME data = NOT subset + expect(isWhereSubset(undefined, gt(ref(`age`), val(10)))).toBe(false) + }) + + it(`should return true for constrained subset with undefined superset`, () => { + // Loaded ALL data, so any constrained subset is covered + expect(isWhereSubset(gt(ref(`age`), val(20)), undefined)).toBe(true) + }) + + it(`should return true for identical expressions`, () => { + const expr = gt(ref(`age`), val(10)) + expect(isWhereSubset(expr, expr)).toBe(true) + }) + + it(`should return true for structurally equal expressions`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + }) + + describe(`comparison operators`, () => { + it(`should handle gt: age > 20 is subset of age > 10`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(20)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle gt: age > 10 is NOT subset of age > 20`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gt(ref(`age`), val(20))) + ).toBe(false) + }) + + it(`should handle gte: age >= 20 is subset of age >= 10`, () => { + expect( + isWhereSubset(gte(ref(`age`), val(20)), gte(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle lt: age < 10 is subset of age < 20`, () => { + expect( + isWhereSubset(lt(ref(`age`), val(10)), lt(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle lt: age < 20 is NOT subset of age < 10`, () => { + expect( + isWhereSubset(lt(ref(`age`), val(20)), lt(ref(`age`), val(10))) + ).toBe(false) + }) + + it(`should handle lte: age <= 10 is subset of age <= 20`, () => { + expect( + isWhereSubset(lte(ref(`age`), val(10)), lte(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle eq: age = 15 is subset of age > 10`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), gt(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle eq: age = 5 is NOT subset of age > 10`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(5)), gt(ref(`age`), val(10))) + ).toBe(false) + }) + + it(`should handle eq: age = 15 is subset of age >= 15`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), gte(ref(`age`), val(15))) + ).toBe(true) + }) + + it(`should handle eq: age = 15 is subset of age < 20`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(15)), lt(ref(`age`), val(20))) + ).toBe(true) + }) + + it(`should handle mixed operators: gt vs gte`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(10)), gte(ref(`age`), val(10))) + ).toBe(true) + }) + + it(`should handle mixed operators: gte vs gt`, () => { + expect( + isWhereSubset(gte(ref(`age`), val(11)), gt(ref(`age`), val(10))) + ).toBe(true) + expect( + isWhereSubset(gte(ref(`age`), val(10)), gt(ref(`age`), val(10))) + ).toBe(false) + }) + }) + + describe(`IN operator`, () => { + it(`should handle eq vs in: age = 5 is subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(5)), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle eq vs in: age = 20 is NOT subset of age IN [5, 10, 15]`, () => { + expect( + isWhereSubset(eq(ref(`age`), val(20)), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + + it(`should handle in vs in: [5, 10] is subset of [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 10]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(true) + }) + + it(`should handle in vs in: [5, 20] is NOT subset of [5, 10, 15]`, () => { + expect( + isWhereSubset(inOp(ref(`age`), [5, 20]), inOp(ref(`age`), [5, 10, 15])) + ).toBe(false) + }) + }) + + describe(`AND combinations`, () => { + it(`should handle AND in subset: (A AND B) is subset of A`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + gt(ref(`age`), val(10)) + ) + ).toBe(true) + }) + + it(`should handle AND in subset: (A AND B) is NOT subset of C (different field)`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)) + ) + ).toBe(false) + }) + + it(`should handle AND in superset: A is subset of (A AND B) is false (superset is more restrictive)`, () => { + expect( + isWhereSubset( + gt(ref(`age`), val(10)), + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(false) + }) + + it(`should handle AND in both: (age > 20 AND status = 'active') is subset of (age > 10 AND status = 'active')`, () => { + expect( + isWhereSubset( + and(gt(ref(`age`), val(20)), eq(ref(`status`), val(`active`))), + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + }) + + describe(`OR combinations`, () => { + it(`should handle OR in superset: A is subset of (A OR B)`, () => { + expect( + isWhereSubset( + gt(ref(`age`), val(10)), + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + + it(`should return false when subset doesn't imply any branch of OR superset`, () => { + expect( + isWhereSubset( + eq(ref(`age`), val(10)), + or(gt(ref(`age`), val(10)), lt(ref(`age`), val(5))) + ) + ).toBe(false) + }) + + it(`should handle OR in subset: (A OR B) is subset of C only if both A and B are subsets of C`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), gt(ref(`age`), val(30))), + gt(ref(`age`), val(10)) + ) + ).toBe(true) + }) + + it(`should handle OR in both: (age > 20 OR status = 'active') is subset of (age > 10 OR status = 'active')`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), eq(ref(`status`), val(`active`))), + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))) + ) + ).toBe(true) + }) + + it(`should handle OR in subset: (A OR B) is NOT subset of C if either is not a subset`, () => { + expect( + isWhereSubset( + or(gt(ref(`age`), val(20)), lt(ref(`age`), val(5))), + gt(ref(`age`), val(10)) + ) + ).toBe(false) + }) + }) + + describe(`different fields`, () => { + it(`should return false for different fields with no relationship`, () => { + expect( + isWhereSubset(gt(ref(`age`), val(20)), gt(ref(`salary`), val(1000))) + ).toBe(false) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should handle Date equality`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + eq(ref(`createdAt`), val(date2)) + ) + ).toBe(true) + }) + + it(`should handle Date range comparisons: date > 2024-01-15 is subset of date > 2024-01-01`, () => { + expect( + isWhereSubset( + gt(ref(`createdAt`), val(date2)), + gt(ref(`createdAt`), val(date1)) + ) + ).toBe(true) + }) + + it(`should handle Date range comparisons: date < 2024-01-15 is subset of date < 2024-02-01`, () => { + expect( + isWhereSubset( + lt(ref(`createdAt`), val(date2)), + lt(ref(`createdAt`), val(date3)) + ) + ).toBe(true) + }) + + it(`should handle Date equality vs range: date = 2024-01-15 is subset of date > 2024-01-01`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + gt(ref(`createdAt`), val(date1)) + ) + ).toBe(true) + }) + + it(`should handle Date equality vs IN: date = 2024-01-15 is subset of date IN [2024-01-01, 2024-01-15, 2024-02-01]`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date2)), + inOp(ref(`createdAt`), [date1, date2, date3]) + ) + ).toBe(true) + }) + + it(`should handle Date IN subset: date IN [2024-01-01, 2024-01-15] is subset of date IN [2024-01-01, 2024-01-15, 2024-02-01]`, () => { + expect( + isWhereSubset( + inOp(ref(`createdAt`), [date1, date2]), + inOp(ref(`createdAt`), [date1, date2, date3]) + ) + ).toBe(true) + }) + + it(`should return false when Date not in IN set`, () => { + expect( + isWhereSubset( + eq(ref(`createdAt`), val(date1)), + inOp(ref(`createdAt`), [date2, date3]) + ) + ).toBe(false) + }) + }) +}) + +describe(`intersectWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return true for empty array`, () => { + const result = intersectWherePredicates([]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(true) + }) + + it(`should return the single predicate as-is`, () => { + const pred = gt(ref(`age`), val(10)) + const result = intersectWherePredicates([pred]) + expect(result).toBe(pred) + }) + }) + + describe(`same field comparisons`, () => { + it(`should take most restrictive for gt: age > 10 AND age > 20 → age > 20`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + gt(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should take most restrictive for gte: age >= 10 AND age >= 20 → age >= 20`, () => { + const result = intersectWherePredicates([ + gte(ref(`age`), val(10)), + gte(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should take most restrictive for lt: age < 20 AND age < 10 → age < 10`, () => { + const result = intersectWherePredicates([ + lt(ref(`age`), val(20)), + lt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`lt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should combine range: age > 10 AND age < 50`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + lt(ref(`age`), val(50)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(2) + }) + + it(`should prefer eq when present: age = 15 AND age > 10 → age = 15`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(15)), + gt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(15) + }) + + it(`should handle gt and gte together: age > 10 AND age >= 15 → age >= 15`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + gte(ref(`age`), val(15)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(15) + }) + }) + + describe(`different fields`, () => { + it(`should combine with AND: age > 10 AND status = 'active'`, () => { + const result = intersectWherePredicates([ + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(2) + }) + }) + + describe(`flatten AND`, () => { + it(`should flatten nested ANDs`, () => { + const result = intersectWherePredicates([ + and(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(3) + }) + }) + + describe(`conflict detection`, () => { + it(`should return false literal for conflicting equalities: age = 5 AND age = 6`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(5)), + eq(ref(`age`), val(6)), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should handle IN intersection: IN [1,2] AND IN [2,3] → IN [2]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`age`), [1, 2]), + inOp(ref(`age`), [2, 3]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toEqual([2]) + }) + + it(`should return false literal for empty IN intersection: IN [1,2] AND IN [3,4]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`age`), [1, 2]), + inOp(ref(`age`), [3, 4]), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should handle multiple IN intersections: IN [1,2,3] AND IN [2,3,4] AND IN [2,4,5] → IN [2]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`age`), [1, 2, 3]), + inOp(ref(`age`), [2, 3, 4]), + inOp(ref(`age`), [2, 4, 5]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toEqual([2]) + }) + + it(`should handle satisfiable equality AND IN: age = 2 AND age IN [1,2] → age = 2`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(2)), + inOp(ref(`age`), [1, 2]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const value = ((result as Func).args[1] as Value).value + expect(value).toBe(2) + }) + + it(`should return false literal for unsatisfiable equality AND IN: age = 2 AND age IN [3,4]`, () => { + const result = intersectWherePredicates([ + eq(ref(`age`), val(2)), + inOp(ref(`age`), [3, 4]), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should intersect Date ranges: date > 2024-01-01 AND date > 2024-01-15 → date > 2024-01-15`, () => { + const result = intersectWherePredicates([ + gt(ref(`createdAt`), val(date1)), + gt(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const value = ((result as Func).args[1] as Value).value + expect(value).toEqual(date2) + }) + + it(`should intersect Date range with bounds: date > 2024-01-01 AND date < 2024-02-01`, () => { + const result = intersectWherePredicates([ + gt(ref(`createdAt`), val(date1)), + lt(ref(`createdAt`), val(date3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`and`) + expect((result as Func).args.length).toBe(2) + }) + + it(`should handle Date equality: date = 2024-01-15 AND date = 2024-01-15 → date = 2024-01-15`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date2)), + eq(ref(`createdAt`), val(new Date(`2024-01-15`))), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const value = ((result as Func).args[1] as Value).value + expect(value).toEqual(date2) + }) + + it(`should return false literal for conflicting Date equalities`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date1)), + eq(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should handle Date IN clause intersection: IN [date1,date2] AND IN [date2,date3] → IN [date2]`, () => { + const result = intersectWherePredicates([ + inOp(ref(`createdAt`), [date1, date2]), + inOp(ref(`createdAt`), [date2, date3]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(1) + expect(values[0]).toEqual(date2) + }) + + it(`should handle Date equality AND IN: date = date2 AND date IN [date1,date2] → date = date2`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date2)), + inOp(ref(`createdAt`), [date1, date2]), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`eq`) + const value = ((result as Func).args[1] as Value).value + expect(value).toEqual(date2) + }) + + it(`should return false literal for Date equality AND non-matching IN`, () => { + const result = intersectWherePredicates([ + eq(ref(`createdAt`), val(date1)), + inOp(ref(`createdAt`), [date2, date3]), + ]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + }) +}) + +describe(`unionWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return false for empty array`, () => { + const result = unionWherePredicates([]) + expect(result.type).toBe(`val`) + expect((result as Value).value).toBe(false) + }) + + it(`should return the single predicate as-is`, () => { + const pred = gt(ref(`age`), val(10)) + const result = unionWherePredicates([pred]) + expect(result).toBe(pred) + }) + }) + + describe(`same field comparisons`, () => { + it(`should take least restrictive for gt: age > 10 OR age > 20 → age > 10`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + gt(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should take least restrictive for gte: age >= 10 OR age >= 20 → age >= 10`, () => { + const result = unionWherePredicates([ + gte(ref(`age`), val(10)), + gte(ref(`age`), val(20)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gte`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + + it(`should take least restrictive for lt: age < 20 OR age < 10 → age < 20`, () => { + const result = unionWherePredicates([ + lt(ref(`age`), val(20)), + lt(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`lt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(20) + }) + + it(`should combine eq into IN: age = 5 OR age = 10 → age IN [5, 10]`, () => { + const result = unionWherePredicates([ + eq(ref(`age`), val(5)), + eq(ref(`age`), val(10)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toContain(5) + expect(values).toContain(10) + expect(values.length).toBe(2) + }) + + it(`should fold IN and equality into single IN: age IN [1,2] OR age = 3 → age IN [1,2,3]`, () => { + const result = unionWherePredicates([ + inOp(ref(`age`), [1, 2]), + eq(ref(`age`), val(3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values).toContain(1) + expect(values).toContain(2) + expect(values).toContain(3) + expect(values.length).toBe(3) + }) + + it(`should handle gte and gt together: age > 10 OR age >= 15 → age > 10`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + gte(ref(`age`), val(15)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`gt`) + const field = (result as Func).args[1] as Value + expect(field.value).toBe(10) + }) + }) + + describe(`different fields`, () => { + it(`should combine with OR: age > 10 OR status = 'active'`, () => { + const result = unionWherePredicates([ + gt(ref(`age`), val(10)), + eq(ref(`status`), val(`active`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`or`) + expect((result as Func).args.length).toBe(2) + }) + }) + + describe(`flatten OR`, () => { + it(`should flatten nested ORs`, () => { + const result = unionWherePredicates([ + or(gt(ref(`age`), val(10)), eq(ref(`status`), val(`active`))), + eq(ref(`name`), val(`John`)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`or`) + expect((result as Func).args.length).toBe(3) + }) + }) + + describe(`Date support`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + it(`should combine Date equalities into IN: date = date1 OR date = date2 → date IN [date1, date2]`, () => { + const result = unionWherePredicates([ + eq(ref(`createdAt`), val(date1)), + eq(ref(`createdAt`), val(date2)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(2) + expect(values).toContainEqual(date1) + expect(values).toContainEqual(date2) + }) + + it(`should fold Date IN and equality: date IN [date1,date2] OR date = date3 → date IN [date1,date2,date3]`, () => { + const result = unionWherePredicates([ + inOp(ref(`createdAt`), [date1, date2]), + eq(ref(`createdAt`), val(date3)), + ]) + expect(result.type).toBe(`func`) + expect((result as Func).name).toBe(`in`) + const values = ((result as Func).args[1] as Value).value + expect(values.length).toBe(3) + expect(values).toContainEqual(date1) + expect(values).toContainEqual(date2) + expect(values).toContainEqual(date3) + }) + }) +}) + +describe(`isOrderBySubset`, () => { + it(`should return true for undefined subset`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(undefined, orderBy)).toBe(true) + expect(isOrderBySubset([], orderBy)).toBe(true) + }) + + it(`should return false for undefined superset with non-empty subset`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(orderBy, undefined)).toBe(false) + expect(isOrderBySubset(orderBy, [])).toBe(false) + }) + + it(`should return true for identical orderBy`, () => { + const orderBy: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(orderBy, orderBy)).toBe(true) + }) + + it(`should return true when subset is prefix of superset`, () => { + const subset: OrderBy = [orderByClause(ref(`age`), `asc`)] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(true) + }) + + it(`should return false when subset is not a prefix`, () => { + const subset: OrderBy = [orderByClause(ref(`name`), `desc`)] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) + + it(`should return false when directions differ`, () => { + const subset: OrderBy = [orderByClause(ref(`age`), `desc`)] + const superset: OrderBy = [orderByClause(ref(`age`), `asc`)] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) + + it(`should return false when subset is longer than superset`, () => { + const subset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + orderByClause(ref(`status`), `asc`), + ] + const superset: OrderBy = [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ] + expect(isOrderBySubset(subset, superset)).toBe(false) + }) +}) + +describe(`isLimitSubset`, () => { + it(`should return true for undefined subset`, () => { + expect(isLimitSubset(undefined, 10)).toBe(true) + }) + + it(`should return true for undefined superset`, () => { + expect(isLimitSubset(10, undefined)).toBe(true) + }) + + it(`should return true when subset <= superset`, () => { + expect(isLimitSubset(10, 20)).toBe(true) + expect(isLimitSubset(10, 10)).toBe(true) + }) + + it(`should return false when subset > superset`, () => { + expect(isLimitSubset(20, 10)).toBe(false) + }) +}) + +describe(`isPredicateSubset`, () => { + it(`should check all components`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), + orderBy: [orderByClause(ref(`age`), `asc`)], + limit: 10, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + orderBy: [ + orderByClause(ref(`age`), `asc`), + orderByClause(ref(`name`), `desc`), + ], + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(true) + }) + + it(`should return false if where is not subset`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(5)), + limit: 10, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false if orderBy is not subset`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), + orderBy: [orderByClause(ref(`name`), `desc`)], + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + orderBy: [orderByClause(ref(`age`), `asc`)], + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) + + it(`should return false if limit is not subset`, () => { + const subset: LoadSubsetOptions = { + where: gt(ref(`age`), val(20)), + limit: 30, + } + const superset: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + expect(isPredicateSubset(subset, superset)).toBe(false) + }) +}) + +describe(`intersectPredicates`, () => { + it(`should return empty for empty array`, () => { + const result = intersectPredicates([]) + expect(result).toEqual({}) + }) + + it(`should return single predicate as-is`, () => { + const pred: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + const result = intersectPredicates([pred]) + expect(result).toBe(pred) + }) + + it(`should produce false literal where clause for contradictory predicates`, () => { + const pred1: LoadSubsetOptions = { where: eq(ref(`age`), val(5)) } + const pred2: LoadSubsetOptions = { where: eq(ref(`age`), val(6)) } + const result = intersectPredicates([pred1, pred2]) + + expect(result.where).toBeDefined() + expect(result.where!.type).toBe(`val`) + expect((result.where as Value).value).toBe(false) + }) + + it(`should intersect where clauses`, () => { + const pred1: LoadSubsetOptions = { where: gt(ref(`age`), val(10)) } + const pred2: LoadSubsetOptions = { where: lt(ref(`age`), val(50)) } + const result = intersectPredicates([pred1, pred2]) + + expect(result.where).toBeDefined() + expect(result.where!.type).toBe(`func`) + expect((result.where as Func).name).toBe(`and`) + }) + + it(`should use first non-empty orderBy`, () => { + const orderBy1: OrderBy = [orderByClause(ref(`age`), `asc`)] + const pred1: LoadSubsetOptions = { orderBy: orderBy1 } + const pred2: LoadSubsetOptions = {} + const result = intersectPredicates([pred1, pred2]) + + expect(result.orderBy).toBe(orderBy1) + }) + + it(`should use minimum limit when all have limits (intersection = most restrictive)`, () => { + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = { limit: 20 } + const pred3: LoadSubsetOptions = { limit: 15 } + const result = intersectPredicates([pred1, pred2, pred3]) + + expect(result.limit).toBe(10) + }) + + it(`should use minimum limit even when some predicates are unlimited`, () => { + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = {} // no limit = unlimited + const pred3: LoadSubsetOptions = { limit: 20 } + const result = intersectPredicates([pred1, pred2, pred3]) + + expect(result.limit).toBe(10) + }) + + it(`should return undefined limit if all predicates are unlimited`, () => { + const pred1: LoadSubsetOptions = {} + const pred2: LoadSubsetOptions = {} + const result = intersectPredicates([pred1, pred2]) + + expect(result.limit).toBeUndefined() + }) +}) + +describe(`unionPredicates`, () => { + it(`should return empty for empty array`, () => { + const result = unionPredicates([]) + expect(result).toEqual({}) + }) + + it(`should return single predicate as-is`, () => { + const pred: LoadSubsetOptions = { + where: gt(ref(`age`), val(10)), + limit: 20, + } + const result = unionPredicates([pred]) + expect(result).toBe(pred) + }) + + it(`should union where clauses`, () => { + const pred1: LoadSubsetOptions = { where: gt(ref(`age`), val(10)) } + const pred2: LoadSubsetOptions = { where: gt(ref(`age`), val(20)) } + const result = unionPredicates([pred1, pred2]) + + expect(result.where).toBeDefined() + expect(result.where!.type).toBe(`func`) + expect((result.where as Func).name).toBe(`gt`) + const value = ((result.where as Func).args[1] as Value).value + expect(value).toBe(10) // least restrictive + }) + + it(`should return undefined orderBy for union`, () => { + const orderBy1: OrderBy = [orderByClause(ref(`age`), `asc`)] + const pred1: LoadSubsetOptions = { orderBy: orderBy1 } + const pred2: LoadSubsetOptions = {} + const result = unionPredicates([pred1, pred2]) + + expect(result.orderBy).toBeUndefined() + }) + + it(`should use minimum limit when all have limits`, () => { + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = { limit: 20 } + const pred3: LoadSubsetOptions = { limit: 15 } + const result = unionPredicates([pred1, pred2, pred3]) + + expect(result.limit).toBe(10) + }) + + it(`should return undefined limit if any predicate is unlimited`, () => { + const pred1: LoadSubsetOptions = { limit: 10 } + const pred2: LoadSubsetOptions = {} // no limit = unlimited + const result = unionPredicates([pred1, pred2]) + + expect(result.limit).toBeUndefined() + }) +}) + +describe(`minusWherePredicates`, () => { + describe(`basic cases`, () => { + it(`should return original predicate when nothing to subtract`, () => { + const pred = gt(ref(`age`), val(10)) + const result = minusWherePredicates(pred, undefined) + + expect(result).toEqual(pred) + }) + + it(`should return null when from is undefined (can't simplify NOT(B))`, () => { + const subtract = gt(ref(`age`), val(10)) + const result = minusWherePredicates(undefined, subtract) + + expect(result).toBeNull() + }) + + it(`should return empty set when from is subset of subtract`, () => { + const from = gt(ref(`age`), val(20)) // age > 20 + const subtract = gt(ref(`age`), val(10)) // age > 10 + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return null when predicates are on different fields`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = eq(ref(`status`), val(`active`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toBeNull() + }) + }) + + describe(`IN minus IN`, () => { + it(`should compute set difference: IN [A,B,C,D] - IN [B,C] = IN [A,D]`, () => { + const from = inOp(ref(`status`), [`A`, `B`, `C`, `D`]) + const subtract = inOp(ref(`status`), [`B`, `C`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`status`), val([`A`, `D`])], + }) + }) + + it(`should return empty set when all values are subtracted`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`A`, `B`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return original when no overlap`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`C`, `D`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual(from) + }) + + it(`should collapse to equality when one value remains`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = inOp(ref(`status`), [`B`]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `eq`, + args: [ref(`status`), val(`A`)], + }) + }) + }) + + describe(`IN minus equality`, () => { + it(`should remove value from IN: IN [A,B,C] - eq(B) = IN [A,C]`, () => { + const from = inOp(ref(`status`), [`A`, `B`, `C`]) + const subtract = eq(ref(`status`), val(`B`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`status`), val([`A`, `C`])], + }) + }) + + it(`should collapse to equality when one value remains`, () => { + const from = inOp(ref(`status`), [`A`, `B`]) + const subtract = eq(ref(`status`), val(`A`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `eq`, + args: [ref(`status`), val(`B`)], + }) + }) + + it(`should return empty set when removing last value`, () => { + const from = inOp(ref(`status`), [`A`]) + const subtract = eq(ref(`status`), val(`A`)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + }) + + describe(`equality minus equality`, () => { + it(`should return empty set when same value`, () => { + const from = eq(ref(`age`), val(15)) + const subtract = eq(ref(`age`), val(15)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should return original when different values`, () => { + const from = eq(ref(`age`), val(15)) + const subtract = eq(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual(from) + }) + }) + + describe(`range minus range - gt/gte`, () => { + it(`should compute difference: age > 10 - age > 20 = (age > 10 AND age <= 20)`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = gt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + + it(`should return original when no overlap: age > 20 - age > 10`, () => { + const from = gt(ref(`age`), val(20)) + const subtract = gt(ref(`age`), val(10)) + const result = minusWherePredicates(from, subtract) + + // age > 20 is subset of age > 10, so result is empty + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should compute difference: age >= 10 - age >= 20 = (age >= 10 AND age < 20)`, () => { + const from = gte(ref(`age`), val(10)) + const subtract = gte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(10)), lt(ref(`age`), val(20))], + }) + }) + + it(`should compute difference: age > 10 - age >= 20 = (age > 10 AND age < 20)`, () => { + const from = gt(ref(`age`), val(10)) + const subtract = gte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lt(ref(`age`), val(20))], + }) + }) + + it(`should compute difference: age >= 10 - age > 20 = (age >= 10 AND age <= 20)`, () => { + const from = gte(ref(`age`), val(10)) + const subtract = gt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + }) + + describe(`range minus range - lt/lte`, () => { + it(`should compute difference: age < 30 - age < 20 = (age >= 20 AND age < 30)`, () => { + const from = lt(ref(`age`), val(30)) + const subtract = lt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(20)), lt(ref(`age`), val(30))], + }) + }) + + it(`should return original when no overlap: age < 20 - age < 30`, () => { + const from = lt(ref(`age`), val(20)) + const subtract = lt(ref(`age`), val(30)) + const result = minusWherePredicates(from, subtract) + + // age < 20 is subset of age < 30, so result is empty + expect(result).toEqual({ type: `val`, value: false }) + }) + + it(`should compute difference: age <= 30 - age <= 20 = (age > 20 AND age <= 30)`, () => { + const from = lte(ref(`age`), val(30)) + const subtract = lte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(20)), lte(ref(`age`), val(30))], + }) + }) + + it(`should compute difference: age < 30 - age <= 20 = (age > 20 AND age < 30)`, () => { + const from = lt(ref(`age`), val(30)) + const subtract = lte(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(20)), lt(ref(`age`), val(30))], + }) + }) + + it(`should compute difference: age <= 30 - age < 20 = (age >= 20 AND age <= 30)`, () => { + const from = lte(ref(`age`), val(30)) + const subtract = lt(ref(`age`), val(20)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [gte(ref(`age`), val(20)), lte(ref(`age`), val(30))], + }) + }) + }) + + describe(`Date support`, () => { + it(`should handle Date IN minus Date IN`, () => { + const date1 = new Date(`2024-01-01`) + const date2 = new Date(`2024-01-15`) + const date3 = new Date(`2024-02-01`) + + const from = inOp(ref(`createdAt`), [date1, date2, date3]) + const subtract = inOp(ref(`createdAt`), [date2]) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `in`, + args: [ref(`createdAt`), val([date1, date3])], + }) + }) + + it(`should handle Date range difference: date > 2024-01-01 - date > 2024-01-15`, () => { + const date1 = new Date(`2024-01-01`) + const date15 = new Date(`2024-01-15`) + + const from = gt(ref(`createdAt`), val(date1)) + const subtract = gt(ref(`createdAt`), val(date15)) + const result = minusWherePredicates(from, subtract) + + expect(result).toEqual({ + type: `func`, + name: `and`, + args: [ + gt(ref(`createdAt`), val(date1)), + lte(ref(`createdAt`), val(date15)), + ], + }) + }) + }) + + describe(`real-world sync scenarios`, () => { + it(`should compute missing data range: need age > 10, already have age > 20`, () => { + const requested = gt(ref(`age`), val(10)) + const alreadyLoaded = gt(ref(`age`), val(20)) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Need to fetch: 10 < age <= 20 + expect(needToFetch).toEqual({ + type: `func`, + name: `and`, + args: [gt(ref(`age`), val(10)), lte(ref(`age`), val(20))], + }) + }) + + it(`should compute missing IDs: need IN [1..100], already have IN [50..100]`, () => { + const allIds = Array.from({ length: 100 }, (_, i) => i + 1) + const loadedIds = Array.from({ length: 51 }, (_, i) => i + 50) + + const requested = inOp(ref(`id`), allIds) + const alreadyLoaded = inOp(ref(`id`), loadedIds) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Need to fetch: ids 1..49 + const expectedIds = Array.from({ length: 49 }, (_, i) => i + 1) + expect(needToFetch).toEqual({ + type: `func`, + name: `in`, + args: [ref(`id`), val(expectedIds)], + }) + }) + + it(`should return empty when all requested data is already loaded`, () => { + const requested = gt(ref(`age`), val(20)) + const alreadyLoaded = gt(ref(`age`), val(10)) + const needToFetch = minusWherePredicates(requested, alreadyLoaded) + + // Requested is subset of already loaded - nothing more to fetch + expect(needToFetch).toEqual({ type: `val`, value: false }) + }) + }) +}) diff --git a/packages/db/tests/subset-dedupe.test.ts b/packages/db/tests/subset-dedupe.test.ts new file mode 100644 index 000000000..0268b3984 --- /dev/null +++ b/packages/db/tests/subset-dedupe.test.ts @@ -0,0 +1,325 @@ +import { describe, expect, it } from "vitest" +import { DeduplicatedLoadSubset } from "../src/query/subset-dedupe" +import { Func, PropRef, Value } from "../src/query/ir" +import type { BasicExpression, OrderBy } from "../src/query/ir" +import type { LoadSubsetOptions } from "../src/types" + +// Helper functions to build expressions more easily +function ref(path: string | Array): PropRef { + return new PropRef(typeof path === `string` ? [path] : path) +} + +function val(value: T): Value { + return new Value(value) +} + +function gt(left: BasicExpression, right: BasicExpression): Func { + return new Func(`gt`, [left, right]) +} + +function lt(left: BasicExpression, right: BasicExpression): Func { + return new Func(`lt`, [left, right]) +} + +function eq(left: BasicExpression, right: BasicExpression): Func { + return new Func(`eq`, [left, right]) +} + +describe(`createDeduplicatedLoadSubset`, () => { + it(`should call underlying loadSubset on first call`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + + expect(callCount).toBe(1) + }) + + it(`should return true immediately for subset unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 10 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: age > 20 (subset of age > 10) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call underlying function + }) + + it(`should call underlying loadSubset for non-subset unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age > 10 (NOT a subset of age > 20) + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) // Should call underlying function + }) + + it(`should combine unlimited calls with union`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: age > 20 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(20)) }) + expect(callCount).toBe(1) + + // Second call: age < 10 (different range) + await deduplicated.loadSubset({ where: lt(ref(`age`), val(10)) }) + expect(callCount).toBe(2) + + // Third call: age > 25 (subset of age > 20) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(25)), + }) + expect(result).toBe(true) + expect(callCount).toBe(2) // Should not call - covered by first call + }) + + it(`should track limited calls separately`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: age > 20, orderBy age asc, limit 5 (subset) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 5, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - subset of first + }) + + it(`should call underlying for non-subset limited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: age > 10, orderBy age asc, limit 10 + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 10, + }) + expect(callCount).toBe(1) + + // Second call: age > 10, orderBy age asc, limit 20 (NOT a subset) + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + limit: 20, + }) + expect(callCount).toBe(2) // Should call - limit is larger + }) + + it(`should check limited calls against unlimited combined predicate`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited age > 10 + await deduplicated.loadSubset({ where: gt(ref(`age`), val(10)) }) + expect(callCount).toBe(1) + + // Second call: limited age > 20 with orderBy + limit + // Even though it has a limit, it's covered by the unlimited call + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + orderBy: orderBy1, + limit: 10, + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - covered by unlimited + }) + + it(`should ignore orderBy for unlimited calls`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`age`), + compareOptions: { + direction: `asc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // First call: unlimited with orderBy + await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + orderBy: orderBy1, + }) + expect(callCount).toBe(1) + + // Second call: subset where, different orderBy, no limit + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(20)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - orderBy ignored for unlimited + }) + + it(`should handle undefined where clauses`, async () => { + let callCount = 0 + const mockLoadSubset = () => { + callCount++ + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + // First call: no where clause (all data) + await deduplicated.loadSubset({}) + expect(callCount).toBe(1) + + // Second call: with where clause (should be covered) + const result = await deduplicated.loadSubset({ + where: gt(ref(`age`), val(10)), + }) + expect(result).toBe(true) + expect(callCount).toBe(1) // Should not call - all data already loaded + }) + + it(`should handle complex real-world scenario`, async () => { + let callCount = 0 + const calls: Array = [] + const mockLoadSubset = (options: LoadSubsetOptions) => { + callCount++ + calls.push(options) + return Promise.resolve() + } + + const deduplicated = new DeduplicatedLoadSubset(mockLoadSubset) + + const orderBy1: OrderBy = [ + { + expression: ref(`createdAt`), + compareOptions: { + direction: `desc`, + nulls: `last`, + stringSort: `lexical`, + }, + }, + ] + + // Load all active users + await deduplicated.loadSubset({ where: eq(ref(`status`), val(`active`)) }) + expect(callCount).toBe(1) + + // Load top 10 active users by createdAt + const result1 = await deduplicated.loadSubset({ + where: eq(ref(`status`), val(`active`)), + orderBy: orderBy1, + limit: 10, + }) + expect(result1).toBe(true) // Covered by unlimited call + expect(callCount).toBe(1) + + // Load all inactive users + await deduplicated.loadSubset({ where: eq(ref(`status`), val(`inactive`)) }) + expect(callCount).toBe(2) + + // Load top 5 inactive users + const result2 = await deduplicated.loadSubset({ + where: eq(ref(`status`), val(`inactive`)), + orderBy: orderBy1, + limit: 5, + }) + expect(result2).toBe(true) // Covered by unlimited inactive call + expect(callCount).toBe(2) + + // Verify only 2 actual calls were made + expect(calls).toHaveLength(2) + expect(calls[0]).toEqual({ where: eq(ref(`status`), val(`active`)) }) + expect(calls[1]).toEqual({ where: eq(ref(`status`), val(`inactive`)) }) + }) +}) diff --git a/packages/electric-db-collection/src/electric.ts b/packages/electric-db-collection/src/electric.ts index bdd6f34a7..a71ae10bd 100644 --- a/packages/electric-db-collection/src/electric.ts +++ b/packages/electric-db-collection/src/electric.ts @@ -6,18 +6,22 @@ import { } from "@electric-sql/client" import { Store } from "@tanstack/store" import DebugModule from "debug" +import { DeduplicatedLoadSubset } from "@tanstack/db" import { ExpectedNumberInAwaitTxIdError, StreamAbortedError, TimeoutWaitingForMatchError, TimeoutWaitingForTxIdError, } from "./errors" +import { compileSQL } from "./sql-compiler" import type { BaseCollectionConfig, CollectionConfig, DeleteMutationFnParams, InsertMutationFnParams, + LoadSubsetOptions, SyncConfig, + SyncMode, UpdateMutationFnParams, UtilsRecord, } from "@tanstack/db" @@ -72,6 +76,24 @@ type InferSchemaOutput = T extends StandardSchemaV1 : Record : Record +/** + * The mode of sync to use for the collection. + * @default `eager` + * @description + * - `eager`: + * - syncs all data immediately on preload + * - collection will be marked as ready once the sync is complete + * - there is no incremental sync + * - `on-demand`: + * - syncs data in incremental snapshots when the collection is queried + * - collection will be marked as ready immediately after the first snapshot is synced + * - `progressive`: + * - syncs all data for the collection in the background + * - uses incremental snapshots during the initial sync to provide a fast path to the data required for queries + * - collection will be marked as ready once the full sync is complete + */ +export type ElectricSyncMode = SyncMode | `progressive` + /** * Configuration interface for Electric collection options * @template T - The type of items in the collection @@ -82,12 +104,13 @@ export interface ElectricCollectionConfig< TSchema extends StandardSchemaV1 = never, > extends Omit< BaseCollectionConfig, - `onInsert` | `onUpdate` | `onDelete` + `onInsert` | `onUpdate` | `onDelete` | `syncMode` > { /** * Configuration options for the ElectricSQL ShapeStream */ shapeOptions: ShapeStreamOptions> + syncMode?: ElectricSyncMode /** * Optional asynchronous handler function called before an insert operation @@ -281,6 +304,9 @@ export function electricCollectionOptions( } { const seenTxids = new Store>(new Set([])) const seenSnapshots = new Store>([]) + const internalSyncMode = config.syncMode ?? `eager` + const finalSyncMode = + internalSyncMode === `progressive` ? `on-demand` : internalSyncMode const pendingMatches = new Store< Map< string, @@ -331,6 +357,7 @@ export function electricCollectionOptions( const sync = createElectricSync(config.shapeOptions, { seenTxids, seenSnapshots, + syncMode: internalSyncMode, pendingMatches, currentBatchMessages, removePendingMatches, @@ -550,6 +577,7 @@ export function electricCollectionOptions( return { ...restConfig, + syncMode: finalSyncMode, sync, onInsert: wrappedOnInsert, onUpdate: wrappedOnUpdate, @@ -567,6 +595,7 @@ export function electricCollectionOptions( function createElectricSync>( shapeOptions: ShapeStreamOptions>, options: { + syncMode: ElectricSyncMode seenTxids: Store> seenSnapshots: Store> pendingMatches: Store< @@ -590,6 +619,7 @@ function createElectricSync>( const { seenTxids, seenSnapshots, + syncMode, pendingMatches, currentBatchMessages, removePendingMatches, @@ -653,6 +683,12 @@ function createElectricSync>( const stream = new ShapeStream({ ...shapeOptions, + // In on-demand mode, we only want to sync changes, so we set the log to `changes_only` + log: syncMode === `on-demand` ? `changes_only` : undefined, + // In on-demand mode, we only need the changes from the point of time the collection was created + // so we default to `now` when there is no saved offset. + offset: + shapeOptions.offset ?? (syncMode === `on-demand` ? `now` : undefined), signal: abortController.signal, onError: (errorParams) => { // Just immediately mark ready if there's an error to avoid blocking @@ -679,9 +715,26 @@ function createElectricSync>( let transactionStarted = false const newTxids = new Set() const newSnapshots: Array = [] + let hasReceivedUpToDate = false // Track if we've completed initial sync in progressive mode + + // Create deduplicated loadSubset wrapper for non-eager modes + // This prevents redundant snapshot requests when multiple concurrent + // live queries request overlapping or subset predicates + const loadSubsetDedupe = + syncMode === `eager` + ? null + : new DeduplicatedLoadSubset(async (opts: LoadSubsetOptions) => { + // In progressive mode, stop requesting snapshots once full sync is complete + if (syncMode === `progressive` && hasReceivedUpToDate) { + return + } + const snapshotParams = compileSQL(opts) + await stream.requestSnapshot(snapshotParams) + }) unsubscribeStream = stream.subscribe((messages: Array>) => { let hasUpToDate = false + let hasSnapshotEnd = false for (const message of messages) { // Add message to current batch buffer (for race condition handling) @@ -746,6 +799,7 @@ function createElectricSync>( }) } else if (isSnapshotEndMessage(message)) { newSnapshots.push(parseSnapshotMessage(message)) + hasSnapshotEnd = true } else if (isUpToDateMessage(message)) { hasUpToDate = true } else if (isMustRefetchMessage(message)) { @@ -761,12 +815,18 @@ function createElectricSync>( truncate() - // Reset hasUpToDate so we continue accumulating changes until next up-to-date + // Reset the loadSubset deduplication state since we're starting fresh + // This ensures that previously loaded predicates don't prevent refetching after truncate + loadSubsetDedupe?.reset() + + // Reset flags so we continue accumulating changes until next up-to-date hasUpToDate = false + hasSnapshotEnd = false + hasReceivedUpToDate = false // Reset for progressive mode - we're starting a new sync } } - if (hasUpToDate) { + if (hasUpToDate || hasSnapshotEnd) { // Clear the current batch buffer since we're now up-to-date currentBatchMessages.setState(() => []) @@ -776,8 +836,15 @@ function createElectricSync>( transactionStarted = false } - // Mark the collection as ready now that sync is up to date - markReady() + if (hasUpToDate || (hasSnapshotEnd && syncMode === `on-demand`)) { + // Mark the collection as ready now that sync is up to date + markReady() + } + + // Track that we've received the first up-to-date for progressive mode + if (hasUpToDate) { + hasReceivedUpToDate = true + } // Always commit txids when we receive up-to-date, regardless of transaction state seenTxids.setState((currentTxids) => { @@ -811,12 +878,16 @@ function createElectricSync>( } }) - // Return the unsubscribe function - return () => { - // Unsubscribe from the stream - unsubscribeStream() - // Abort the abort controller to stop the stream - abortController.abort() + // Return the deduplicated loadSubset if available (on-demand or progressive mode) + // The loadSubset method is auto-bound, so it can be safely returned directly + return { + loadSubset: loadSubsetDedupe?.loadSubset, + cleanup: () => { + // Unsubscribe from the stream + unsubscribeStream() + // Abort the abort controller to stop the stream + abortController.abort() + }, } }, // Expose the getSyncMetadata function diff --git a/packages/electric-db-collection/src/pg-serializer.ts b/packages/electric-db-collection/src/pg-serializer.ts new file mode 100644 index 000000000..707c4e1b8 --- /dev/null +++ b/packages/electric-db-collection/src/pg-serializer.ts @@ -0,0 +1,27 @@ +export function serialize(value: unknown): string { + if (typeof value === `string`) { + return `'${value}'` + } + + if (typeof value === `number`) { + return value.toString() + } + + if (value === null || value === undefined) { + return `NULL` + } + + if (typeof value === `boolean`) { + return value ? `true` : `false` + } + + if (value instanceof Date) { + return `'${value.toISOString()}'` + } + + if (Array.isArray(value)) { + return `ARRAY[${value.map(serialize).join(`,`)}]` + } + + throw new Error(`Cannot serialize value: ${JSON.stringify(value)}`) +} diff --git a/packages/electric-db-collection/src/sql-compiler.ts b/packages/electric-db-collection/src/sql-compiler.ts new file mode 100644 index 000000000..969869aae --- /dev/null +++ b/packages/electric-db-collection/src/sql-compiler.ts @@ -0,0 +1,163 @@ +import { serialize } from "./pg-serializer" +import type { SubsetParams } from "@electric-sql/client" +import type { IR, LoadSubsetOptions } from "@tanstack/db" + +export type CompiledSqlRecord = Omit & { + params?: Array +} + +export function compileSQL(options: LoadSubsetOptions): SubsetParams { + const { where, orderBy, limit } = options + + const params: Array = [] + const compiledSQL: CompiledSqlRecord = { params } + + if (where) { + // TODO: this only works when the where expression's PropRefs directly reference a column of the collection + // doesn't work if it goes through aliases because then we need to know the entire query to be able to follow the reference until the base collection (cf. followRef function) + compiledSQL.where = compileBasicExpression(where, params) + } + + if (orderBy) { + compiledSQL.orderBy = compileOrderBy(orderBy, params) + } + + if (limit) { + compiledSQL.limit = limit + } + + // Serialize the values in the params array into PG formatted strings + // and transform the array into a Record + const paramsRecord = params.reduce( + (acc, param, index) => { + acc[`${index + 1}`] = serialize(param) + return acc + }, + {} as Record + ) + + return { + ...compiledSQL, + params: paramsRecord, + } +} + +/** + * Compiles the expression to a SQL string and mutates the params array with the values. + * @param exp - The expression to compile + * @param params - The params array + * @returns The compiled SQL string + */ +function compileBasicExpression( + exp: IR.BasicExpression, + params: Array +): string { + switch (exp.type) { + case `val`: + params.push(exp.value) + return `$${params.length}` + case `ref`: + // TODO: doesn't yet support JSON(B) values which could be accessed with nested props + if (exp.path.length !== 1) { + throw new Error( + `Compiler can't handle nested properties: ${exp.path.join(`.`)}` + ) + } + return exp.path[0]! + case `func`: + return compileFunction(exp, params) + default: + throw new Error(`Unknown expression type`) + } +} + +function compileOrderBy(orderBy: IR.OrderBy, params: Array): string { + const compiledOrderByClauses = orderBy.map((clause: IR.OrderByClause) => + compileOrderByClause(clause, params) + ) + return compiledOrderByClauses.join(`,`) +} + +function compileOrderByClause( + clause: IR.OrderByClause, + params: Array +): string { + // TODO: what to do with stringSort and locale? + // Correctly supporting them is tricky as it depends on Postgres' collation + const { expression, compareOptions } = clause + let sql = compileBasicExpression(expression, params) + + if (compareOptions.direction === `desc`) { + sql = `${sql} DESC` + } + + if (compareOptions.nulls === `first`) { + sql = `${sql} NULLS FIRST` + } + + if (compareOptions.nulls === `last`) { + sql = `${sql} NULLS LAST` + } + + return sql +} + +function compileFunction( + exp: IR.Func, + params: Array = [] +): string { + const { name, args } = exp + + const opName = getOpName(name) + + const compiledArgs = args.map((arg: IR.BasicExpression) => + compileBasicExpression(arg, params) + ) + + if (isBinaryOp(name)) { + if (compiledArgs.length !== 2) { + throw new Error(`Binary operator ${name} expects 2 arguments`) + } + const [lhs, rhs] = compiledArgs + return `${lhs} ${opName} ${rhs}` + } + + return `${opName}(${compiledArgs.join(`,`)})` +} + +function isBinaryOp(name: string): boolean { + const binaryOps = [`eq`, `gt`, `gte`, `lt`, `lte`, `and`, `or`] + return binaryOps.includes(name) +} + +function getOpName(name: string): string { + const opNames = { + eq: `=`, + gt: `>`, + gte: `>=`, + lt: `<`, + lte: `<=`, + add: `+`, + and: `AND`, + or: `OR`, + not: `NOT`, + isUndefined: `IS NULL`, + isNull: `IS NULL`, + in: `IN`, + like: `LIKE`, + ilike: `ILIKE`, + upper: `UPPER`, + lower: `LOWER`, + length: `LENGTH`, + concat: `CONCAT`, + coalesce: `COALESCE`, + } + + const opName = opNames[name as keyof typeof opNames] + + if (!opName) { + throw new Error(`Unknown operator/function: ${name}`) + } + + return opName +} diff --git a/packages/electric-db-collection/tests/electric-live-query.test.ts b/packages/electric-db-collection/tests/electric-live-query.test.ts index b387f1756..b3a55d087 100644 --- a/packages/electric-db-collection/tests/electric-live-query.test.ts +++ b/packages/electric-db-collection/tests/electric-live-query.test.ts @@ -4,6 +4,7 @@ import { createLiveQueryCollection, eq, gt, + lt, } from "@tanstack/db" import { electricCollectionOptions } from "../src/electric" import type { ElectricCollectionUtils } from "../src/electric" @@ -54,10 +55,39 @@ const sampleUsers: Array = [ // Mock the ShapeStream module const mockSubscribe = vi.fn() +const mockRequestSnapshot = vi.fn() const mockStream = { subscribe: mockSubscribe, + requestSnapshot: async (...args: any) => { + const result = await mockRequestSnapshot(...args) + const subscribers = mockSubscribe.mock.calls.map((call) => call[0]) + const data = [...result.data] + + const messages: Array> = data.map((row: any) => ({ + value: row.value, + key: row.key, + headers: row.headers, + })) + + if (messages.length > 0) { + // add an up-to-date message + messages.push({ + headers: { control: `up-to-date` }, + }) + } + + subscribers.forEach((subscriber) => subscriber(messages)) + return result + }, } +// Mock the requestSnapshot method +// to return an empty array of data +// since most tests don't use it +mockRequestSnapshot.mockResolvedValue({ + data: [], +}) + vi.mock(`@electric-sql/client`, async () => { const actual = await vi.importActual(`@electric-sql/client`) return { @@ -437,4 +467,802 @@ describe.each([ // Clean up subscription.unsubscribe() }) + if (autoIndex === `eager`) { + it(`should load more data via requestSnapshot when creating live query with higher limit`, async () => { + // Create a new electric collection with on-demand syncMode for this test + vi.clearAllMocks() + + let testSubscriber: (messages: Array>) => void = () => {} + mockSubscribe.mockImplementation((callback) => { + testSubscriber = callback + return () => {} + }) + + const testElectricCollection = createCollection( + electricCollectionOptions({ + id: `test-incremental-loading`, + shapeOptions: { + url: `http://test-url`, + params: { table: `users` }, + }, + syncMode: `on-demand`, + getKey: (user: User) => user.id, + startSync: true, + autoIndex: `eager` as const, + }) + ) + + mockRequestSnapshot.mockResolvedValue({ + data: [], + }) + + // Initial sync with limited data + testSubscriber([ + ...sampleUsers.map((user) => ({ + key: user.id.toString(), + value: user, + headers: { operation: `insert` as const }, + })), + { headers: { control: `up-to-date` as const } }, + ]) + + expect(testElectricCollection.status).toBe(`ready`) + expect(testElectricCollection.size).toBe(4) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(0) + + // Create first live query with limit of 2 + const limitedLiveQuery = createLiveQueryCollection({ + id: `limited-users-live-query`, + startSync: true, + query: (q) => + q + .from({ user: testElectricCollection }) + .where(({ user }) => eq(user.active, true)) + .select(({ user }) => ({ + id: user.id, + name: user.name, + active: user.active, + age: user.age, + })) + .orderBy(({ user }) => user.age, `asc`) + .limit(2), + }) + + expect(limitedLiveQuery.status).toBe(`ready`) + expect(limitedLiveQuery.size).toBe(2) // Only first 2 active users + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + const callArgs = (index: number) => + mockRequestSnapshot.mock.calls[index]?.[0] + expect(callArgs(0)).toMatchObject({ + params: { "1": `true` }, + where: `active = $1`, + orderBy: `age NULLS FIRST`, + limit: 2, + }) + + // Next call will return a snapshot containing 2 rows + // Calls after that will return the default empty snapshot + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 5, + value: { + id: 5, + name: `Eve`, + age: 30, + email: `eve@example.com`, + active: true, + }, + }, + { + headers: { operation: `insert` }, + key: 6, + value: { + id: 6, + name: `Frank`, + age: 35, + email: `frank@example.com`, + active: true, + }, + }, + ], + }) + + // Create second live query with higher limit of 6 + const expandedLiveQuery = createLiveQueryCollection({ + id: `expanded-users-live-query`, + startSync: true, + query: (q) => + q + .from({ user: testElectricCollection }) + .where(({ user }) => eq(user.active, true)) + .select(({ user }) => ({ + id: user.id, + name: user.name, + active: user.active, + })) + .orderBy(({ user }) => user.age, `asc`) + .limit(6), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // With deduplication, the expanded query (limit 6) is NOT a subset of the limited query (limit 2), + // so it will trigger a new requestSnapshot call. However, some of the recursive + // calls may be deduped if they're covered by the union of previous unlimited calls. + // We expect at least 2 calls: the initial limit 2 and the initial limit 6. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + + // Check that first it requested a limit of 2 users (from first query) + expect(callArgs(0)).toMatchObject({ + params: { "1": `true` }, + where: `active = $1`, + orderBy: `age NULLS FIRST`, + limit: 2, + }) + + // Check that second it requested a limit of 6 users (from second query) + expect(callArgs(1)).toMatchObject({ + params: { "1": `true` }, + where: `active = $1`, + orderBy: `age NULLS FIRST`, + limit: 6, + }) + + // The expanded live query should have the locally available data + expect(expandedLiveQuery.status).toBe(`ready`) + // The mock returned 2 additional users (Eve and Frank) in response to the limit 6 request, + // plus the initial 3 active users (Alice, Bob, Dave) from the initial sync + expect(expandedLiveQuery.size).toBe(5) + }) + } +}) + +// Tests specifically for syncMode behavior with live queries +describe(`Electric Collection with Live Query - syncMode integration`, () => { + let subscriber: (messages: Array>) => void + + function createElectricCollectionWithSyncMode( + syncMode: `eager` | `on-demand` | `progressive` + ) { + vi.clearAllMocks() + + mockSubscribe.mockImplementation((callback) => { + subscriber = callback + return () => {} + }) + + mockRequestSnapshot.mockResolvedValue({ + data: [], + }) + + const config = { + id: `electric-users-${syncMode}`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `users`, + }, + }, + syncMode, + getKey: (user: User) => user.id, + } + + const options = electricCollectionOptions(config) + return createCollection({ + ...options, + startSync: true, + autoIndex: `eager` as const, + }) + } + + function simulateInitialSync(users: Array = sampleUsers) { + const messages: Array> = users.map((user) => ({ + key: user.id.toString(), + value: user, + headers: { operation: `insert` }, + })) + + messages.push({ + headers: { control: `up-to-date` }, + }) + + subscriber(messages) + } + + it(`should trigger requestSnapshot in on-demand mode when live query needs more data`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + // Initial sync with limited data + simulateInitialSync([sampleUsers[0]!, sampleUsers[1]!]) // Only Alice and Bob + expect(electricCollection.status).toBe(`ready`) + expect(electricCollection.size).toBe(2) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(0) + + // Mock requestSnapshot to return additional data + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 3, + value: sampleUsers[2]!, // Charlie + }, + { + headers: { operation: `insert` }, + key: 4, + value: sampleUsers[3]!, // Dave + }, + ], + }) + + // Create live query with limit that exceeds available data + const liveQuery = createLiveQueryCollection({ + id: `on-demand-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(5), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested more data from Electric with correct parameters + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 5, // Requests full limit from Electric + orderBy: `age NULLS FIRST`, + where: `active = $1`, + params: { 1: `true` }, // Parameters are stringified + }) + ) + expect(liveQuery.size).toBeGreaterThan(2) + }) + + it(`should trigger requestSnapshot in progressive mode when live query needs more data`, async () => { + const electricCollection = + createElectricCollectionWithSyncMode(`progressive`) + + // Send initial snapshot with limited data (using snapshot-end, not up-to-date) + // This keeps the collection in "loading" state, simulating progressive mode still syncing + subscriber([ + { + key: sampleUsers[0]!.id.toString(), + value: sampleUsers[0]!, + headers: { operation: `insert` }, + }, + { + key: sampleUsers[1]!.id.toString(), + value: sampleUsers[1]!, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(electricCollection.status).toBe(`loading`) // Still syncing in progressive mode + expect(electricCollection.size).toBe(2) + + // Mock requestSnapshot to return additional data + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 3, + value: sampleUsers[2]!, // Charlie + }, + ], + }) + + // Create live query that needs more data + createLiveQueryCollection({ + id: `progressive-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .orderBy(({ user }) => user.id, `asc`) + .limit(3), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested more data from Electric with correct parameters + // First request asks for the full limit + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 3, // Requests full limit from Electric + orderBy: `id NULLS FIRST`, + params: {}, + }) + ) + }) + + it(`should NOT trigger requestSnapshot in eager mode even when live query needs more data`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`eager`) + + // Initial sync with limited data + simulateInitialSync([sampleUsers[0]!, sampleUsers[1]!]) // Only Alice and Bob + expect(electricCollection.status).toBe(`ready`) + expect(electricCollection.size).toBe(2) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(0) + + // Create live query with limit that exceeds available data + const liveQuery = createLiveQueryCollection({ + id: `eager-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(5), + }) + + // Wait for the live query to process + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should NOT have requested more data (eager mode doesn't support incremental loading) + expect(mockRequestSnapshot).not.toHaveBeenCalled() + expect(liveQuery.size).toBe(2) // Only has the initially synced data + }) + + it(`should request additional snapshots progressively as live query expands in on-demand mode`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + // Initial sync with just Alice + simulateInitialSync([sampleUsers[0]!]) + expect(electricCollection.size).toBe(1) + + // First snapshot returns Bob and Charlie + mockRequestSnapshot.mockResolvedValueOnce({ + data: [ + { + headers: { operation: `insert` }, + key: 2, + value: sampleUsers[1]!, // Bob + }, + { + headers: { operation: `insert` }, + key: 3, + value: sampleUsers[2]!, // Charlie + }, + ], + }) + + // Create live query with limit of 3 + createLiveQueryCollection({ + id: `expanding-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .orderBy(({ user }) => user.age, `asc`) + .limit(3), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested snapshot for limit 3 + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 3, + orderBy: `age NULLS FIRST`, + }) + ) + + // With deduplication, the unlimited where predicate (no where clause) is tracked, + // and subsequent calls for the same unlimited predicate may be deduped. + // After receiving Bob and Charlie, we have 3 users total, which satisfies the limit of 3, + // so no additional requests should be made. + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + }) + + it(`should pass correct WHERE clause to requestSnapshot when live query has filters`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.size).toBe(0) + + // Create filtered live query + createLiveQueryCollection({ + id: `filtered-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.name, `desc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested snapshot with WHERE clause + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + where: `active = $1`, + params: { "1": `true` }, + orderBy: `name DESC NULLS FIRST`, + limit: 10, + }) + ) + }) + + it(`should handle complex filters in requestSnapshot`, async () => { + const electricCollection = + createElectricCollectionWithSyncMode(`progressive`) + + // Send snapshot-end (not up-to-date) to keep collection in loading state + subscriber([ + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(electricCollection.status).toBe(`loading`) // Still syncing in progressive mode + + // Create live query with complex WHERE clause + createLiveQueryCollection({ + id: `complex-filter-live-query`, + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 20)) + .orderBy(({ user }) => user.age, `asc`) + .limit(5), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have requested snapshot with complex WHERE clause + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + where: `age > $1`, + params: { "1": `20` }, + orderBy: `age NULLS FIRST`, + limit: 5, + }) + ) + }) +}) + +// Tests specifically for loadSubset deduplication +describe(`Electric Collection - loadSubset deduplication`, () => { + let subscriber: (messages: Array>) => void + + function createElectricCollectionWithSyncMode( + syncMode: `on-demand` | `progressive` + ) { + vi.clearAllMocks() + + mockSubscribe.mockImplementation((callback) => { + subscriber = callback + return () => {} + }) + + mockRequestSnapshot.mockResolvedValue({ + data: [], + }) + + const config = { + id: `electric-dedupe-test-${syncMode}`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `users`, + }, + }, + syncMode, + getKey: (user: User) => user.id, + } + + const options = electricCollectionOptions(config) + return createCollection({ + ...options, + startSync: true, + autoIndex: `eager` as const, + }) + } + + function simulateInitialSync(users: Array = sampleUsers) { + const messages: Array> = users.map((user) => ({ + key: user.id.toString(), + value: user, + headers: { operation: `insert` }, + })) + + messages.push({ + headers: { control: `up-to-date` }, + }) + + subscriber(messages) + } + + it(`should deduplicate identical concurrent loadSubset requests`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create three identical live queries concurrently + // Without deduplication, this would trigger 3 requestSnapshot calls + // With deduplication, only 1 should be made + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // With deduplication, only 1 requestSnapshot call should be made + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + where: `active = $1`, + params: { "1": `true` }, + orderBy: `age NULLS FIRST`, + limit: 10, + }) + ) + }) + + it(`should deduplicate subset loadSubset requests`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query with a broader predicate + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 10)) + .orderBy(({ user }) => user.age, `asc`) + .limit(20), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create a live query with a subset predicate (age > 20 is subset of age > 10) + // This should be deduped - no additional requestSnapshot call + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 20)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still only 1 call - the second was deduped as a subset + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + }) + + it(`should NOT deduplicate non-subset loadSubset requests`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query with a narrower predicate + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 30)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create a live query with a broader predicate (age > 20 is NOT subset of age > 30) + // This should NOT be deduped - should trigger another requestSnapshot + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 20)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have 2 calls - the second was not a subset + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + }) + + it(`should reset deduplication state on must-refetch/truncate`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync(sampleUsers) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Simulate a must-refetch (which triggers truncate and reset) + subscriber([{ headers: { control: `must-refetch` } }]) + subscriber([{ headers: { control: `up-to-date` } }]) + + // Wait for the existing live query to re-request data after truncate + await new Promise((resolve) => setTimeout(resolve, 0)) + + // The existing live query re-requests its data after truncate (call 2) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + + // Create the same live query again after reset + // This should NOT be deduped because the reset cleared the deduplication state, + // but it WILL be deduped because the existing live query just made the same request (call 2) + // So creating a different query to ensure we test the reset + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, false)) + .orderBy(({ user }) => user.age, `asc`) + .limit(10), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Should have 3 calls - the different query triggered a new request + expect(mockRequestSnapshot).toHaveBeenCalledTimes(3) + }) + + it(`should deduplicate unlimited queries regardless of orderBy`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create a live query without limit (unlimited) + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.age, `asc`), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create another unlimited query with same where but different orderBy + // This should be deduped - orderBy is ignored for unlimited queries + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => eq(user.active, true)) + .orderBy(({ user }) => user.name, `desc`), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still only 1 call - different orderBy doesn't matter for unlimited queries + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + }) + + it(`should combine multiple unlimited queries with union`, async () => { + const electricCollection = createElectricCollectionWithSyncMode(`on-demand`) + + simulateInitialSync([]) + expect(electricCollection.status).toBe(`ready`) + + // Create first unlimited query (age > 30) + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 30)), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Create second unlimited query (age < 20) - different range + // This should trigger a new request + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => lt(user.age, 20)), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + + // Create third query (age > 35) - this is a subset of (age > 30) + // This should be deduped + createLiveQueryCollection({ + startSync: true, + query: (q) => + q + .from({ user: electricCollection }) + .where(({ user }) => gt(user.age, 35)), + }) + + await new Promise((resolve) => setTimeout(resolve, 0)) + + // Still 2 calls - third was covered by the union of first two + expect(mockRequestSnapshot).toHaveBeenCalledTimes(2) + }) }) diff --git a/packages/electric-db-collection/tests/electric.test-d.ts b/packages/electric-db-collection/tests/electric.test-d.ts index b45d47370..27f90918d 100644 --- a/packages/electric-db-collection/tests/electric.test-d.ts +++ b/packages/electric-db-collection/tests/electric.test-d.ts @@ -1,6 +1,7 @@ import { describe, expectTypeOf, it } from "vitest" import { z } from "zod" import { + and, createCollection, createLiveQueryCollection, eq, @@ -200,7 +201,7 @@ describe(`Electric collection type resolution tests`, () => { query: (q) => q .from({ user: usersCollection }) - .where(({ user }) => eq(user.active, true) && gt(user.age, 18)) + .where(({ user }) => and(eq(user.active, true), gt(user.age, 18))) .select(({ user }) => ({ id: user.id, name: user.name, diff --git a/packages/electric-db-collection/tests/electric.test.ts b/packages/electric-db-collection/tests/electric.test.ts index bf059a021..032e42033 100644 --- a/packages/electric-db-collection/tests/electric.test.ts +++ b/packages/electric-db-collection/tests/electric.test.ts @@ -19,8 +19,10 @@ import type { StandardSchemaV1 } from "@standard-schema/spec" // Mock the ShapeStream module const mockSubscribe = vi.fn() +const mockRequestSnapshot = vi.fn() const mockStream = { subscribe: mockSubscribe, + requestSnapshot: mockRequestSnapshot, } vi.mock(`@electric-sql/client`, async () => { @@ -50,6 +52,9 @@ describe(`Electric Integration`, () => { return () => {} }) + // Reset mock requestSnapshot + mockRequestSnapshot.mockResolvedValue(undefined) + // Create collection with Electric configuration const config = { id: `test`, @@ -728,6 +733,9 @@ describe(`Electric Integration`, () => { expect(testCollection.has(1)).toBe(true) }) + // NOTE: This test has a known issue with unhandled rejection warnings + // This is a pre-existing issue from main branch (not caused by merge) + // The test functionality works correctly, but vitest reports unhandled rejections it(`should timeout with custom match function when no match found`, async () => { vi.useFakeTimers() @@ -754,14 +762,16 @@ describe(`Electric Integration`, () => { const testCollection = createCollection(electricCollectionOptions(config)) const tx = testCollection.insert({ id: 1, name: `Timeout Test` }) - // Add catch handler to prevent global unhandled rejection detection - tx.isPersisted.promise.catch(() => {}) + // Capture the rejection promise before advancing timers + const rejectionPromise = expect(tx.isPersisted.promise).rejects.toThrow( + `Timeout waiting for custom match function` + ) // Advance timers to trigger timeout await vi.runOnlyPendingTimersAsync() // Should timeout and fail - await expect(tx.isPersisted.promise).rejects.toThrow() + await rejectionPromise vi.useRealTimers() }) @@ -834,6 +844,9 @@ describe(`Electric Integration`, () => { expect(options.onDelete).toBeDefined() }) + // NOTE: This test has a known issue with unhandled rejection warnings + // This is a pre-existing issue from main branch (not caused by merge) + // The test functionality works correctly, but vitest reports unhandled rejections it(`should cleanup pending matches on timeout without memory leaks`, async () => { vi.useFakeTimers() @@ -862,16 +875,16 @@ describe(`Electric Integration`, () => { // Start insert that will timeout const tx = testCollection.insert({ id: 1, name: `Timeout Test` }) - // Add catch handler to prevent global unhandled rejection detection - tx.isPersisted.promise.catch(() => {}) + // Capture the rejection promise before advancing timers + const rejectionPromise = expect(tx.isPersisted.promise).rejects.toThrow( + `Timeout waiting for custom match function` + ) // Advance timers to trigger timeout await vi.runOnlyPendingTimersAsync() // Should timeout and fail - await expect(tx.isPersisted.promise).rejects.toThrow( - `Timeout waiting for custom match function` - ) + await rejectionPromise // Send a message after timeout - should not cause any side effects // This verifies that the pending match was properly cleaned up @@ -1601,7 +1614,662 @@ describe(`Electric Integration`, () => { // Snapshot txid should also resolve await expect(testCollection.utils.awaitTxId(105)).resolves.toBe(true) }) + }) + + // Tests for syncMode configuration + describe(`syncMode configuration`, () => { + it(`should not request snapshots during subscription in eager mode`, () => { + vi.clearAllMocks() + + const config = { + id: `eager-no-snapshot-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Subscribe and try to get more data + const subscription = testCollection.subscribeChanges(() => {}) + + // In eager mode, requestSnapshot should not be called + expect(mockRequestSnapshot).not.toHaveBeenCalled() + + subscription.unsubscribe() + }) + + it(`should request incremental snapshots in on-demand mode when loadSubset is called`, async () => { + vi.clearAllMocks() + + const config = { + id: `on-demand-snapshot-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send up-to-date to mark collection as ready + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // In on-demand mode, calling loadSubset should request a snapshot + await testCollection._sync.loadSubset({ limit: 10 }) + + // Verify requestSnapshot was called + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 10, + params: {}, + }) + ) + }) + + it(`should request incremental snapshots in progressive mode when loadSubset is called before sync completes`, async () => { + vi.clearAllMocks() + + const config = { + id: `progressive-snapshot-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with snapshot-end (but not up-to-date yet - still syncing) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(testCollection.status).toBe(`loading`) // Not ready yet + + // In progressive mode, calling loadSubset should request a snapshot BEFORE full sync completes + await testCollection._sync.loadSubset({ limit: 20 }) + + // Verify requestSnapshot was called + expect(mockRequestSnapshot).toHaveBeenCalledWith( + expect.objectContaining({ + limit: 20, + params: {}, + }) + ) + }) + + it(`should not request snapshots when loadSubset is called in eager mode`, async () => { + vi.clearAllMocks() + + const config = { + id: `eager-no-loadsubset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send up-to-date to mark collection as ready + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // In eager mode, loadSubset should do nothing + await testCollection._sync.loadSubset({ limit: 10 }) + + // Verify requestSnapshot was NOT called + expect(mockRequestSnapshot).not.toHaveBeenCalled() + }) + + it(`should handle progressive mode syncing in background`, async () => { + vi.clearAllMocks() + + const config = { + id: `progressive-background-sync-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with snapshot-end (but not up-to-date - still syncing) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Initial User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Collection should have data but not be ready yet + expect(testCollection.status).toBe(`loading`) + expect(testCollection.has(1)).toBe(true) + + // Should be able to request more data incrementally before full sync completes + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).toHaveBeenCalled() + + // Now send up-to-date to complete the sync + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + expect(testCollection.status).toBe(`ready`) + }) + + it(`should stop requesting snapshots in progressive mode after first up-to-date`, async () => { + vi.clearAllMocks() + + const config = { + id: `progressive-stop-after-sync-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with snapshot-end (not up-to-date yet) + subscriber([ + { + key: `1`, + value: { id: 1, name: `User 1` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + expect(testCollection.status).toBe(`loading`) // Not ready yet in progressive + expect(testCollection.has(1)).toBe(true) + + // Should be able to request more data before up-to-date + vi.clearAllMocks() + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).toHaveBeenCalledTimes(1) + + // Now send up-to-date to complete the full sync + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + expect(testCollection.status).toBe(`ready`) + + // Try to request more data - should NOT make a request since full sync is complete + vi.clearAllMocks() + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).not.toHaveBeenCalled() + }) + + it(`should allow snapshots in on-demand mode even after up-to-date`, async () => { + vi.clearAllMocks() + + const config = { + id: `on-demand-after-sync-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send initial data with up-to-date + subscriber([ + { + key: `1`, + value: { id: 1, name: `User 1` }, + headers: { operation: `insert` }, + }, + { + headers: { control: `up-to-date` }, + }, + ]) + + expect(testCollection.status).toBe(`ready`) + + // Should STILL be able to request more data in on-demand mode + vi.clearAllMocks() + await testCollection._sync.loadSubset({ limit: 10 }) + expect(mockRequestSnapshot).toHaveBeenCalled() + }) + + it(`should default offset to 'now' in on-demand mode when no offset provided`, async () => { + vi.clearAllMocks() + + // Import ShapeStream to check constructor calls + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `on-demand-offset-now-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + // No offset provided + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with offset: 'now' + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: `now`, + }) + ) + }) + + it(`should use undefined offset in eager mode when no offset provided`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `eager-offset-undefined-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + // No offset provided + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with offset: undefined + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: undefined, + }) + ) + }) + + it(`should use undefined offset in progressive mode when no offset provided`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `progressive-offset-undefined-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + // No offset provided + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with offset: undefined + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: undefined, + }) + ) + }) + + it(`should use explicit offset when provided regardless of syncMode`, async () => { + vi.clearAllMocks() + + const { ShapeStream } = await import(`@electric-sql/client`) + + const config = { + id: `explicit-offset-test`, + shapeOptions: { + url: `http://test-url`, + params: { + table: `test_table`, + }, + offset: -1 as any, // Explicit offset + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + createCollection(electricCollectionOptions(config)) + + // Check that ShapeStream was called with the explicit offset + expect(ShapeStream).toHaveBeenCalledWith( + expect.objectContaining({ + offset: -1, + }) + ) + }) + }) + + // Tests for commit and ready behavior with snapshot-end and up-to-date messages + describe(`Commit and ready behavior`, () => { + it(`should commit on snapshot-end in eager mode but not mark ready`, () => { + const config = { + id: `eager-snapshot-end-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by snapshot-end (but no up-to-date) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Data should be committed (available in state) + expect(testCollection.has(1)).toBe(true) + expect(testCollection.get(1)).toEqual({ id: 1, name: `Test User` }) + + // But collection should NOT be marked as ready yet in eager mode + expect(testCollection.status).toBe(`loading`) + + // Now send up-to-date + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // Now it should be ready + expect(testCollection.status).toBe(`ready`) + }) + + it(`should commit and mark ready on snapshot-end in on-demand mode`, () => { + const config = { + id: `on-demand-snapshot-end-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `on-demand` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by snapshot-end (but no up-to-date) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Data should be committed (available in state) + expect(testCollection.has(1)).toBe(true) + expect(testCollection.get(1)).toEqual({ id: 1, name: `Test User` }) + + // Collection SHOULD be marked as ready in on-demand mode + expect(testCollection.status).toBe(`ready`) + }) + + it(`should commit on snapshot-end in progressive mode but not mark ready`, () => { + const config = { + id: `progressive-snapshot-end-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `progressive` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by snapshot-end (but no up-to-date) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // Data should be committed (available in state) + expect(testCollection.has(1)).toBe(true) + expect(testCollection.get(1)).toEqual({ id: 1, name: `Test User` }) + + // But collection should NOT be marked as ready yet in progressive mode + expect(testCollection.status).toBe(`loading`) + + // Now send up-to-date + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // Now it should be ready + expect(testCollection.status).toBe(`ready`) + }) + + it(`should commit multiple snapshot-end messages before up-to-date in eager mode`, () => { + const config = { + id: `eager-multiple-snapshots-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // First snapshot with data + subscriber([ + { + key: `1`, + value: { id: 1, name: `User 1` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `100`, + xmax: `110`, + xip_list: [], + }, + }, + ]) + + // First data should be committed + expect(testCollection.has(1)).toBe(true) + expect(testCollection.status).toBe(`loading`) + + // Second snapshot with more data + subscriber([ + { + key: `2`, + value: { id: 2, name: `User 2` }, + headers: { operation: `insert` }, + }, + { + headers: { + control: `snapshot-end`, + xmin: `110`, + xmax: `120`, + xip_list: [], + }, + }, + ]) + + // Second data should also be committed + expect(testCollection.has(2)).toBe(true) + expect(testCollection.size).toBe(2) + expect(testCollection.status).toBe(`loading`) + + // Finally send up-to-date + subscriber([ + { + headers: { control: `up-to-date` }, + }, + ]) + + // Now should be ready + expect(testCollection.status).toBe(`ready`) + }) + + it(`should handle up-to-date without snapshot-end (traditional behavior)`, () => { + const config = { + id: `traditional-up-to-date-test`, + shapeOptions: { + url: `http://test-url`, + params: { table: `test_table` }, + }, + syncMode: `eager` as const, + getKey: (item: Row) => item.id as number, + startSync: true, + } + + const testCollection = createCollection(electricCollectionOptions(config)) + + // Send data followed by up-to-date (no snapshot-end) + subscriber([ + { + key: `1`, + value: { id: 1, name: `Test User` }, + headers: { operation: `insert` }, + }, + { + headers: { control: `up-to-date` }, + }, + ]) + + // Data should be committed and collection ready + expect(testCollection.has(1)).toBe(true) + expect(testCollection.status).toBe(`ready`) + }) + }) + describe(`syncMode configuration - GC and resync`, () => { it(`should resync after garbage collection and new subscription`, () => { // Use fake timers for this test vi.useFakeTimers() diff --git a/packages/electric-db-collection/tsconfig.json b/packages/electric-db-collection/tsconfig.json index 7e586bab3..fc6368937 100644 --- a/packages/electric-db-collection/tsconfig.json +++ b/packages/electric-db-collection/tsconfig.json @@ -12,7 +12,9 @@ "forceConsistentCasingInFileNames": true, "jsx": "react", "paths": { - "@tanstack/store": ["../store/src"] + "@tanstack/store": ["../store/src"], + "@tanstack/db-ivm": ["../db-ivm/src"], + "@tanstack/db": ["../db/src"] } }, "include": ["src", "tests", "vite.config.ts"],