Skip to content

Commit af7f033

Browse files
committed
Comments
1 parent 22ae99a commit af7f033

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

c/tskit/genotypes.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737

3838
#include <tskit/genotypes.h>
3939

40+
// FIXME Tskit already has a bitset implementation that maybe we could use
41+
4042
static inline uint32_t
4143
tsk_haplotype_ctz64(uint64_t x)
4244
{
@@ -151,6 +153,7 @@ tsk_haplotype_find_next_uncovered(tsk_haplotype_t *self, tsk_size_t start,
151153
if (word >= self->num_bit_words) {
152154
return false;
153155
}
156+
// FIXME Horrendous logic here, needs jeromeifying.
154157
uint64_t start_mask = UINT64_MAX << (start & 63);
155158
for (; word <= last_word && word < self->num_bit_words; word++) {
156159
if (self->unresolved_counts[word] == 0) {
@@ -231,6 +234,8 @@ tsk_haplotype_reset_bitset(tsk_haplotype_t *self)
231234
}
232235
}
233236

237+
// FIXME We're building the whole index here, which is a bit sad when we're clipping a
238+
// region.
234239
static int
235240
tsk_haplotype_build_parent_index(tsk_haplotype_t *self)
236241
{
@@ -313,6 +318,7 @@ tsk_haplotype_build_parent_index(tsk_haplotype_t *self)
313318
return ret;
314319
}
315320

321+
// FIXME No point adding mutations who are above nodes we have no interest in.
316322
static int
317323
tsk_haplotype_build_mutation_index(tsk_haplotype_t *self)
318324
{
@@ -408,6 +414,7 @@ tsk_haplotype_build_mutation_index(tsk_haplotype_t *self)
408414
return ret;
409415
}
410416

417+
// FIXME Not sure this is even needed
411418
static int
412419
tsk_haplotype_build_ancestral_states(tsk_haplotype_t *self)
413420
{
@@ -659,13 +666,15 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
659666
edge_parent = edges->parent;
660667
bits = self->unresolved_bits;
661668

669+
// Create a bitset that tracks which sites are still unresolved
662670
for (idx = 0; idx < (tsk_size_t) self->num_sites; idx++) {
663671
haplotype[idx] = (int8_t) self->ancestral_states[idx];
664672
}
665673
tsk_haplotype_reset_bitset(self);
666674

667675
mut_start = self->node_mutation_offsets[node];
668676
mut_end = self->node_mutation_offsets[node + 1];
677+
// Apply mutations above this node
669678
for (int32_t m = mut_start; m < mut_end; m++) {
670679
int32_t site = self->node_mutation_sites[m];
671680
if (site >= 0 && site < self->num_sites
@@ -682,6 +691,8 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
682691
child_start = self->parent_index_range[range_offset];
683692
child_stop = self->parent_index_range[range_offset + 1];
684693
}
694+
// Push all edges from this node (that are still relavent to resolving sites) onto
695+
// the stack
685696
for (int32_t i = child_start; i < child_stop; i++) {
686697
tsk_id_t edge = self->parent_edge_index[i];
687698
int32_t start = self->edge_start_index[edge];
@@ -700,13 +711,15 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
700711
}
701712
}
702713

714+
// Now process the stack until we run out of edges or have resolved all sites
703715
while (stack_top > 0) {
704716
stack_top--;
705717
tsk_id_t edge = self->edge_stack[stack_top];
706718
tsk_id_t ancestor = edge_parent[edge];
707719
interval_start = self->stack_interval_start[stack_top];
708720
interval_end = self->stack_interval_end[stack_top];
709721

722+
// Apply mutations above this ancestor
710723
if (ancestor >= 0) {
711724
mut_start = self->node_mutation_offsets[ancestor];
712725
mut_end = self->node_mutation_offsets[ancestor + 1];
@@ -720,6 +733,8 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
720733
}
721734
}
722735

736+
// Going up the tree push all edges from this ancestor (that are still relavent
737+
// to resolving sites)
723738
parent_count = 0;
724739
if (ancestor >= 0 && self->parent_index_range != NULL) {
725740
int32_t range_offset = ancestor * 2;
@@ -756,6 +771,8 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
756771
child_stop = 0;
757772
}
758773

774+
// Clear out any sites that are still unresolved in this interval but not covered
775+
// by any parent edges
759776
tsk_size_t uncovered_idx;
760777
while (tsk_haplotype_find_next_uncovered(self, (tsk_size_t) interval_start,
761778
(tsk_size_t) interval_end, self->parent_interval_start,
@@ -766,6 +783,7 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
766783
}
767784
}
768785

786+
// Reset the bitset for next time
769787
for (tsk_size_t w = 0; w < self->num_bit_words; w++) {
770788
self->unresolved_bits[w] = 0;
771789
self->unresolved_counts[w] = 0;

python/tskit/trees.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5274,6 +5274,8 @@ def _haplotypes_array(
52745274
num_sites = stop_site - start_site
52755275
missing_int8 = ord(missing_data_character)
52765276

5277+
# FIXME! The low-level code doesn't support isolated_as_missing
5278+
# yet so we do this ugly check here
52775279
want_missing = (
52785280
True if isolated_as_missing is None else bool(isolated_as_missing)
52795281
)
@@ -5323,7 +5325,9 @@ def _haplotypes_array(
53235325
if num_samples == 0 or num_sites == 0:
53245326
return H, (start_site, stop_site - 1)
53255327

5326-
# For now deal with missing data using the variants iterator
5328+
# FIXME! The low-level code doesn't support isolated_as_missing
5329+
# yet so we do this ugly thing of using the variants code to find
5330+
# sites with missing data
53275331
missing_mask = None
53285332
if want_missing:
53295333
for var in self.variants(

0 commit comments

Comments
 (0)