Skip to content

Commit dc9e33b

Browse files
committed
Only calculate visited nodes per-tree
1 parent ff0e9f6 commit dc9e33b

File tree

2 files changed

+60
-31
lines changed

2 files changed

+60
-31
lines changed

c/tskit/genotypes.c

Lines changed: 59 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ tsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence,
134134
tsk_size_t num_samples_alloc;
135135

136136
tsk_memset(self, 0, sizeof(tsk_variant_t));
137+
self->missingmess_cache_tree_index = TSK_NULL;
137138

138139
/* Set site id to NULL to indicate the variant is not decoded */
139140
self->site.id = TSK_NULL;
@@ -412,6 +413,55 @@ tsk_variant_update_genotypes_traversal(
412413
return tsk_variant_traverse(self, node, derived, tsk_variant_visit);
413414
}
414415

416+
static void
417+
tsk_variant_update_missing_cache(tsk_variant_t *self)
418+
{
419+
const tsk_id_t *restrict left_child = self->tree.left_child;
420+
const tsk_id_t *restrict right_sib = self->tree.right_sib;
421+
const tsk_id_t *restrict sample_index_map = self->sample_index_map;
422+
const tsk_id_t N = self->tree.virtual_root;
423+
const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;
424+
bool *restrict present = self->sample_is_present;
425+
tsk_id_t *restrict stack = self->traversal_stack;
426+
tsk_id_t root, u, v, sample_index;
427+
tsk_size_t j;
428+
int stack_top;
429+
430+
tsk_bug_assert(self->alt_samples != NULL);
431+
tsk_bug_assert(self->tree.index != TSK_NULL);
432+
433+
tsk_memset(present, 0, self->num_samples * sizeof(*present));
434+
435+
stack_top = -1;
436+
for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) {
437+
stack_top++;
438+
stack[stack_top] = root;
439+
}
440+
441+
while (stack_top >= 0) {
442+
u = stack[stack_top];
443+
stack_top--;
444+
sample_index = sample_index_map[u];
445+
if (sample_index != TSK_NULL) {
446+
present[sample_index] = true;
447+
}
448+
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
449+
stack_top++;
450+
stack[stack_top] = v;
451+
}
452+
}
453+
454+
for (j = 0; j < self->num_samples; j++) {
455+
u = self->samples[j];
456+
present[j]
457+
= present[j]
458+
&& !((flags[u] & TSK_NODE_IS_SAMPLE) != 0
459+
&& self->tree.parent[u] == TSK_NULL && left_child[u] == TSK_NULL);
460+
}
461+
462+
self->missingmess_cache_tree_index = self->tree.index;
463+
}
464+
415465
static tsk_size_t
416466
tsk_variant_mark_missing(tsk_variant_t *self)
417467
{
@@ -421,12 +471,8 @@ tsk_variant_mark_missing(tsk_variant_t *self)
421471
const tsk_id_t *restrict sample_index_map = self->sample_index_map;
422472
const tsk_id_t N = self->tree.virtual_root;
423473
int32_t *restrict genotypes = self->genotypes;
424-
tsk_id_t root, sample_index, u, v;
425-
const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;
426-
bool *restrict present = self->sample_is_present;
427-
tsk_id_t *restrict stack = self->traversal_stack;
428-
int stack_top = -1;
429-
tsk_size_t j = 0;
474+
tsk_id_t root, sample_index;
475+
tsk_size_t j;
430476

431477
if (self->alt_samples == NULL) {
432478
for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) {
@@ -439,37 +485,14 @@ tsk_variant_mark_missing(tsk_variant_t *self)
439485
}
440486
}
441487
} else {
442-
tsk_memset(present, 0, self->num_samples * sizeof(*present));
443-
444-
for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) {
445-
stack_top++;
446-
stack[stack_top] = root;
447-
}
448-
449-
while (stack_top >= 0) {
450-
u = stack[stack_top];
451-
stack_top--;
452-
sample_index = sample_index_map[u];
453-
if (sample_index != TSK_NULL) {
454-
present[sample_index] = true;
455-
}
456-
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
457-
stack_top++;
458-
stack[stack_top] = v;
459-
}
460-
}
461-
462488
for (j = 0; j < self->num_samples; j++) {
463-
u = self->samples[j];
464-
if (!present[j]
465-
|| ((flags[u] & TSK_NODE_IS_SAMPLE) != 0
466-
&& self->tree.parent[u] == TSK_NULL
467-
&& left_child[u] == TSK_NULL)) {
489+
if (!self->sample_is_present[j]) {
468490
genotypes[j] = TSK_MISSING_DATA;
469491
num_missing++;
470492
}
471493
}
472494
}
495+
473496
return num_missing;
474497
}
475498

@@ -570,6 +593,10 @@ tsk_variant_decode(
570593
* mutations directly over samples should not be missing */
571594
num_missing = 0;
572595
if (!impute_missing) {
596+
if (self->alt_samples != NULL
597+
&& self->missingmess_cache_tree_index != self->tree.index) {
598+
tsk_variant_update_missing_cache(self);
599+
}
573600
num_missing = mark_missing(self);
574601
}
575602
for (j = 0; j < self->site.mutations_length; j++) {
@@ -627,6 +654,7 @@ tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
627654
other->alt_sample_index_map = NULL;
628655
other->sample_is_present = NULL;
629656
other->user_alleles_mem = NULL;
657+
other->missingmess_cache_tree_index = TSK_NULL;
630658

631659
total_len = 0;
632660
for (j = 0; j < self->num_alleles; j++) {

c/tskit/genotypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ typedef struct {
7777
tsk_flags_t options;
7878
tsk_id_t *alt_samples;
7979
tsk_id_t *alt_sample_index_map;
80+
tsk_id_t missingmess_cache_tree_index;
8081

8182
} tsk_variant_t;
8283

0 commit comments

Comments
 (0)