@@ -134,6 +134,7 @@ tsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence,
134134 tsk_size_t num_samples_alloc ;
135135
136136 tsk_memset (self , 0 , sizeof (tsk_variant_t ));
137+ self -> missingmess_cache_tree_index = TSK_NULL ;
137138
138139 /* Set site id to NULL to indicate the variant is not decoded */
139140 self -> site .id = TSK_NULL ;
@@ -412,6 +413,55 @@ tsk_variant_update_genotypes_traversal(
412413 return tsk_variant_traverse (self , node , derived , tsk_variant_visit );
413414}
414415
416+ static void
417+ tsk_variant_update_missing_cache (tsk_variant_t * self )
418+ {
419+ const tsk_id_t * restrict left_child = self -> tree .left_child ;
420+ const tsk_id_t * restrict right_sib = self -> tree .right_sib ;
421+ const tsk_id_t * restrict sample_index_map = self -> sample_index_map ;
422+ const tsk_id_t N = self -> tree .virtual_root ;
423+ const tsk_flags_t * restrict flags = self -> tree_sequence -> tables -> nodes .flags ;
424+ bool * restrict present = self -> sample_is_present ;
425+ tsk_id_t * restrict stack = self -> traversal_stack ;
426+ tsk_id_t root , u , v , sample_index ;
427+ tsk_size_t j ;
428+ int stack_top ;
429+
430+ tsk_bug_assert (self -> alt_samples != NULL );
431+ tsk_bug_assert (self -> tree .index != TSK_NULL );
432+
433+ tsk_memset (present , 0 , self -> num_samples * sizeof (* present ));
434+
435+ stack_top = -1 ;
436+ for (root = left_child [N ]; root != TSK_NULL ; root = right_sib [root ]) {
437+ stack_top ++ ;
438+ stack [stack_top ] = root ;
439+ }
440+
441+ while (stack_top >= 0 ) {
442+ u = stack [stack_top ];
443+ stack_top -- ;
444+ sample_index = sample_index_map [u ];
445+ if (sample_index != TSK_NULL ) {
446+ present [sample_index ] = true;
447+ }
448+ for (v = left_child [u ]; v != TSK_NULL ; v = right_sib [v ]) {
449+ stack_top ++ ;
450+ stack [stack_top ] = v ;
451+ }
452+ }
453+
454+ for (j = 0 ; j < self -> num_samples ; j ++ ) {
455+ u = self -> samples [j ];
456+ present [j ]
457+ = present [j ]
458+ && !((flags [u ] & TSK_NODE_IS_SAMPLE ) != 0
459+ && self -> tree .parent [u ] == TSK_NULL && left_child [u ] == TSK_NULL );
460+ }
461+
462+ self -> missingmess_cache_tree_index = self -> tree .index ;
463+ }
464+
415465static tsk_size_t
416466tsk_variant_mark_missing (tsk_variant_t * self )
417467{
@@ -421,12 +471,8 @@ tsk_variant_mark_missing(tsk_variant_t *self)
421471 const tsk_id_t * restrict sample_index_map = self -> sample_index_map ;
422472 const tsk_id_t N = self -> tree .virtual_root ;
423473 int32_t * restrict genotypes = self -> genotypes ;
424- tsk_id_t root , sample_index , u , v ;
425- const tsk_flags_t * restrict flags = self -> tree_sequence -> tables -> nodes .flags ;
426- bool * restrict present = self -> sample_is_present ;
427- tsk_id_t * restrict stack = self -> traversal_stack ;
428- int stack_top = -1 ;
429- tsk_size_t j = 0 ;
474+ tsk_id_t root , sample_index ;
475+ tsk_size_t j ;
430476
431477 if (self -> alt_samples == NULL ) {
432478 for (root = left_child [N ]; root != TSK_NULL ; root = right_sib [root ]) {
@@ -439,37 +485,14 @@ tsk_variant_mark_missing(tsk_variant_t *self)
439485 }
440486 }
441487 } else {
442- tsk_memset (present , 0 , self -> num_samples * sizeof (* present ));
443-
444- for (root = left_child [N ]; root != TSK_NULL ; root = right_sib [root ]) {
445- stack_top ++ ;
446- stack [stack_top ] = root ;
447- }
448-
449- while (stack_top >= 0 ) {
450- u = stack [stack_top ];
451- stack_top -- ;
452- sample_index = sample_index_map [u ];
453- if (sample_index != TSK_NULL ) {
454- present [sample_index ] = true;
455- }
456- for (v = left_child [u ]; v != TSK_NULL ; v = right_sib [v ]) {
457- stack_top ++ ;
458- stack [stack_top ] = v ;
459- }
460- }
461-
462488 for (j = 0 ; j < self -> num_samples ; j ++ ) {
463- u = self -> samples [j ];
464- if (!present [j ]
465- || ((flags [u ] & TSK_NODE_IS_SAMPLE ) != 0
466- && self -> tree .parent [u ] == TSK_NULL
467- && left_child [u ] == TSK_NULL )) {
489+ if (!self -> sample_is_present [j ]) {
468490 genotypes [j ] = TSK_MISSING_DATA ;
469491 num_missing ++ ;
470492 }
471493 }
472494 }
495+
473496 return num_missing ;
474497}
475498
@@ -570,6 +593,10 @@ tsk_variant_decode(
570593 * mutations directly over samples should not be missing */
571594 num_missing = 0 ;
572595 if (!impute_missing ) {
596+ if (self -> alt_samples != NULL
597+ && self -> missingmess_cache_tree_index != self -> tree .index ) {
598+ tsk_variant_update_missing_cache (self );
599+ }
573600 num_missing = mark_missing (self );
574601 }
575602 for (j = 0 ; j < self -> site .mutations_length ; j ++ ) {
@@ -627,6 +654,7 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
627654 other -> alt_sample_index_map = NULL ;
628655 other -> sample_is_present = NULL ;
629656 other -> user_alleles_mem = NULL ;
657+ other -> missingmess_cache_tree_index = TSK_NULL ;
630658
631659 total_len = 0 ;
632660 for (j = 0 ; j < self -> num_alleles ; j ++ ) {
0 commit comments