@@ -12,10 +12,8 @@ import {
1212 type ITreeCursorSynchronous ,
1313 LeafNodeStoredSchema ,
1414 ObjectNodeStoredSchema ,
15- type StoredSchemaCollection ,
1615 type TreeFieldStoredSchema ,
1716 type TreeNodeSchemaIdentifier ,
18- type TreeStoredSchema ,
1917 type TreeStoredSchemaSubscription ,
2018 type TreeValue ,
2119 type Value ,
@@ -24,6 +22,7 @@ import {
2422 ValueSchema ,
2523 type TreeChunk ,
2624 tryGetChunk ,
25+ type SchemaAndPolicy ,
2726} from "../../core/index.js" ;
2827import { getOrCreate } from "../../util/index.js" ;
2928import type { FullSchemaPolicy } from "../modular-schema/index.js" ;
@@ -32,28 +31,38 @@ import { isStableNodeIdentifier } from "../node-identifier/index.js";
3231import { BasicChunk } from "./basicChunk.js" ;
3332import { SequenceChunk } from "./sequenceChunk.js" ;
3433import { type FieldShape , TreeShape , UniformChunk } from "./uniformChunk.js" ;
34+ import type { IncrementalEncodingPolicy } from "./codec/index.js" ;
3535
3636export interface Disposable {
3737 /**
3838 * Cleans up resources used by this, such as inbound event registrations.
3939 */
4040 dispose ( ) : void ;
4141}
42-
4342/**
4443 * Creates a ChunkPolicy which responds to schema changes.
4544 */
4645export function makeTreeChunker (
4746 schema : TreeStoredSchemaSubscription ,
4847 policy : FullSchemaPolicy ,
48+ shouldEncodeIncrementally : IncrementalEncodingPolicy ,
4949) : IChunker {
5050 return new Chunker (
5151 schema ,
5252 policy ,
5353 defaultChunkPolicy . sequenceChunkInlineThreshold ,
5454 defaultChunkPolicy . sequenceChunkInlineThreshold ,
5555 defaultChunkPolicy . uniformChunkNodeCount ,
56- tryShapeFromSchema ,
56+ ( type : TreeNodeSchemaIdentifier , shapes : Map < TreeNodeSchemaIdentifier , ShapeInfo > ) =>
57+ tryShapeFromNodeSchema (
58+ {
59+ schema,
60+ policy,
61+ shouldEncodeIncrementally,
62+ shapes,
63+ } ,
64+ type ,
65+ ) ,
5766 ) ;
5867}
5968
@@ -73,7 +82,7 @@ export interface IChunker extends ChunkPolicy, Disposable {
7382 *
7483 * @remarks
7584 * For example, a schema transitively containing a sequence field, optional field, or allowing multiple child types will be Polymorphic.
76- * See `tryShapeFromSchema ` for how to tell if a type is Polymorphic.
85+ * See `tryShapeFromNodeSchema ` for how to tell if a type is Polymorphic.
7786 *
7887 * TODO: cache some of the possible shapes here.
7988 */
@@ -109,9 +118,7 @@ export class Chunker implements IChunker {
109118 public readonly sequenceChunkInlineThreshold : number ,
110119 public readonly uniformChunkNodeCount : number ,
111120 // eslint-disable-next-line @typescript-eslint/no-shadow
112- private readonly tryShapeFromSchema : (
113- schema : TreeStoredSchema ,
114- policy : FullSchemaPolicy ,
121+ private readonly tryShapeFromNodeSchema : (
115122 type : TreeNodeSchemaIdentifier ,
116123 shapes : Map < TreeNodeSchemaIdentifier , ShapeInfo > ,
117124 ) => ShapeInfo ,
@@ -126,7 +133,7 @@ export class Chunker implements IChunker {
126133 this . sequenceChunkSplitThreshold ,
127134 this . sequenceChunkInlineThreshold ,
128135 this . uniformChunkNodeCount ,
129- this . tryShapeFromSchema ,
136+ this . tryShapeFromNodeSchema ,
130137 ) ;
131138 }
132139
@@ -138,7 +145,7 @@ export class Chunker implements IChunker {
138145 this . unregisterSchemaCallback = this . schema . events . on ( "afterSchemaChange" , ( ) =>
139146 this . schemaChanged ( ) ,
140147 ) ;
141- return this . tryShapeFromSchema ( this . schema , this . policy , schema , this . typeShapes ) ;
148+ return this . tryShapeFromNodeSchema ( schema , this . typeShapes ) ;
142149 }
143150
144151 public dispose ( ) : void {
@@ -226,75 +233,126 @@ export function makePolicy(policy?: Partial<ChunkPolicy>): ChunkPolicy {
226233 return withDefaults ;
227234}
228235
229- export function shapesFromSchema (
230- schema : StoredSchemaCollection ,
231- policy : FullSchemaPolicy ,
232- ) : Map < TreeNodeSchemaIdentifier , ShapeInfo > {
233- const shapes : Map < TreeNodeSchemaIdentifier , ShapeInfo > = new Map ( ) ;
234- for ( const identifier of schema . nodeSchema . keys ( ) ) {
235- tryShapeFromSchema ( schema , policy , identifier , shapes ) ;
236- }
237- return shapes ;
236+ export interface ShapeFromSchemaParameters extends SchemaAndPolicy {
237+ /**
238+ * Policy function to determine if a field should be encoded incrementally.
239+ * Incrementally encoding requires the subtree to not start in the middle of a larger uniform chunk.
240+ * Thus returning true from this callback indicates that shapes should not be produced which could
241+ *contain the incremental portion as a part of a larger shape.
242+ */
243+ readonly shouldEncodeIncrementally : IncrementalEncodingPolicy ;
244+ /**
245+ * A cache for shapes which may be read and/or updated.
246+ * As the shape is a function of the other members of `ShapeFromSchemaParameters`,
247+ * it must be replaced or cleared if any of the properties other than this cache are modified.
248+ */
249+ readonly shapes : Map < TreeNodeSchemaIdentifier , ShapeInfo > ;
250+ }
251+
252+ /**
253+ * A TreeFieldStoredSchema with some additional context about where it is in the tree.
254+ */
255+ export interface FieldSchemaWithContext {
256+ /**
257+ * The identifier of the specific field schema to analyze for shape uniformity.
258+ */
259+ readonly fieldSchema : TreeFieldStoredSchema ;
260+ /**
261+ * The identifier of the parent node schema containing this field.
262+ * If undefined, this is a root field.
263+ */
264+ readonly parentNodeSchema ?: TreeNodeSchemaIdentifier ;
265+ /**
266+ * The field key/name used to identify this field within the parent node.
267+ */
268+ readonly key : FieldKey ;
238269}
239270
240271/**
241- * If `schema` has only one shape, return it.
272+ * Analyzes a tree node schema to determine if it has a single, uniform shape that can be optimized for chunking.
273+ * If the schema defines a tree structure with a deterministic, fixed shape (no optional fields, no sequences,
274+ * single child types), returns a TreeShape that can be used for efficient uniform chunking. Otherwise,
275+ * returns Polymorphic to indicate the shape varies and should use basic chunking.
276+ *
277+ * @param context - {@link ShapeFromSchemaParameters}.
278+ * @param nodeSchema - The identifier of the specific node schema to analyze for shape uniformity.
279+ * @returns TreeShape if the schema has a uniform shape, or Polymorphic if shape varies.
242280 *
243- * Note that this does not tolerate optional or sequence fields, nor does it optimize for patterns of specific values.
281+ * @remarks
282+ * The determination here is conservative. `shouldEncodeIncrementally` is used to split up shapes so incrementally
283+ * encoded schema are not part of larger shapes. It also does not tolerate optional or sequence fields, nor does it
284+ * optimize for patterns of specific values.
244285 */
245- export function tryShapeFromSchema (
246- schema : StoredSchemaCollection ,
247- policy : FullSchemaPolicy ,
248- type : TreeNodeSchemaIdentifier ,
249- shapes : Map < TreeNodeSchemaIdentifier , ShapeInfo > ,
286+ export function tryShapeFromNodeSchema (
287+ context : ShapeFromSchemaParameters ,
288+ nodeSchema : TreeNodeSchemaIdentifier ,
250289) : ShapeInfo {
251- return getOrCreate ( shapes , type , ( ) => {
252- const treeSchema = schema . nodeSchema . get ( type ) ?? fail ( 0xaf9 /* missing schema */ ) ;
290+ const { schema, shapes } = context ;
291+ return getOrCreate ( shapes , nodeSchema , ( ) => {
292+ const treeSchema = schema . nodeSchema . get ( nodeSchema ) ?? fail ( 0xaf9 /* missing schema */ ) ;
253293 if ( treeSchema instanceof LeafNodeStoredSchema ) {
254294 // Allow all string values (but only string values) to be compressed by the id compressor.
255295 // This allows compressing all compressible identifiers without requiring additional context to know which values could be identifiers.
256296 // Attempting to compress other string shouldn't have significant overhead,
257297 // and if any of them do end up compressing, that's a benefit not a bug.
258298 return treeSchema . leafValue === ValueSchema . String
259- ? new TreeShape ( type , true , [ ] , true )
260- : new TreeShape ( type , true , [ ] , false ) ;
299+ ? new TreeShape ( nodeSchema , true , [ ] , true )
300+ : new TreeShape ( nodeSchema , true , [ ] , false ) ;
261301 }
262302 if ( treeSchema instanceof ObjectNodeStoredSchema ) {
263303 const fieldsArray : FieldShape [ ] = [ ] ;
264- for ( const [ key , field ] of treeSchema . objectNodeFields ) {
265- const fieldShape = tryShapeFromFieldSchema ( schema , policy , field , key , shapes ) ;
304+ for ( const [ key , fieldSchema ] of treeSchema . objectNodeFields ) {
305+ const fieldShape = tryShapeFromFieldSchema ( context , {
306+ fieldSchema,
307+ parentNodeSchema : nodeSchema ,
308+ key,
309+ } ) ;
266310 if ( fieldShape === undefined ) {
267311 return polymorphic ;
268312 }
269313 fieldsArray . push ( fieldShape ) ;
270314 }
271- return new TreeShape ( type , false , fieldsArray ) ;
315+ return new TreeShape ( nodeSchema , false , fieldsArray ) ;
272316 }
273317 return polymorphic ;
274318 } ) ;
275319}
276320
277321/**
278- * If `schema` has only one shape, return it .
322+ * Same as { @link tryShapeFromNodeSchema} but for fields with { @link FieldSchemaWithContext} instead of a nodeSchema .
279323 *
280- * Note that this does not tolerate optional or sequence fields, nor does it optimize for patterns of specific values.
324+ * @param context - {@link ShapeFromFieldSchemaParameters}.
325+ * @param fieldSchemaWithContext - {@link FieldSchemaWithContext}.
326+ * @returns FieldShape if the field has a uniform shape, or undefined if the field is polymorphic.
281327 */
282328export function tryShapeFromFieldSchema (
283- schema : StoredSchemaCollection ,
284- policy : FullSchemaPolicy ,
285- type : TreeFieldStoredSchema ,
286- key : FieldKey ,
287- shapes : Map < TreeNodeSchemaIdentifier , ShapeInfo > ,
329+ context : ShapeFromSchemaParameters ,
330+ fieldSchemaWithContext : FieldSchemaWithContext ,
288331) : FieldShape | undefined {
289- const kind = policy . fieldKinds . get ( type . kind ) ?? fail ( 0xafa /* missing FieldKind */ ) ;
332+ const { schema, policy, shouldEncodeIncrementally, shapes } = context ;
333+ const { fieldSchema, parentNodeSchema, key } = fieldSchemaWithContext ;
334+ // If this field should be encoded incrementally, use polymorphic shape so that they
335+ // are chunked separately and can be re-used across encodings if they do not change.
336+ if ( shouldEncodeIncrementally ( parentNodeSchema , key ) ) {
337+ return undefined ;
338+ }
339+ const kind = policy . fieldKinds . get ( fieldSchema . kind ) ?? fail ( 0xafa /* missing FieldKind */ ) ;
290340 if ( kind . multiplicity !== Multiplicity . Single ) {
291341 return undefined ;
292342 }
293- if ( type . types ?. size !== 1 ) {
343+ if ( fieldSchema . types ?. size !== 1 ) {
294344 return undefined ;
295345 }
296- const childType = [ ...type . types ] [ 0 ] ?? oob ( ) ;
297- const childShape = tryShapeFromSchema ( schema , policy , childType , shapes ) ;
346+ const childType = [ ...fieldSchema . types ] [ 0 ] ?? oob ( ) ;
347+ const childShape = tryShapeFromNodeSchema (
348+ {
349+ schema,
350+ policy,
351+ shouldEncodeIncrementally,
352+ shapes,
353+ } ,
354+ childType ,
355+ ) ;
298356 if ( childShape instanceof Polymorphic ) {
299357 return undefined ;
300358 }
@@ -490,7 +548,16 @@ export function chunkRange(
490548 return output ;
491549}
492550/**
493- * @param idCompressor - compressor used to encoded string values that are compressible by the idCompressor for in-memory representation.
551+ * Extracts values from the current cursor position according to the provided tree shape.
552+ *
553+ * Walks through the tree structure defined by the shape, extracting values from leaf nodes
554+ * and recursively processing child fields. If an ID compressor is provided, compressible
555+ * string values (stable node identifiers) will be recompressed for optimal storage.
556+ *
557+ * @param cursor - Tree cursor positioned at the node to extract values from
558+ * @param shape - The tree shape defining the structure to extract
559+ * @param values - Array to append the extracted values to
560+ * @param idCompressor - Optional compressor used to encode string values that are compressible by the idCompressor for in-memory representation.
494561 * If the idCompressor is not provided, the values will be the original uncompressed values.
495562 */
496563export function insertValues (
0 commit comments