3737
3838#include <tskit/genotypes.h>
3939
40+ // FIXME Tskit already has a bitset implementation that maybe we could use
41+
4042static inline uint32_t
4143tsk_haplotype_ctz64 (uint64_t x )
4244{
@@ -151,6 +153,7 @@ tsk_haplotype_find_next_uncovered(tsk_haplotype_t *self, tsk_size_t start,
151153 if (word >= self -> num_bit_words ) {
152154 return false;
153155 }
156+ // FIXME Horrendous logic here, needs jeromeifying.
154157 uint64_t start_mask = UINT64_MAX << (start & 63 );
155158 for (; word <= last_word && word < self -> num_bit_words ; word ++ ) {
156159 if (self -> unresolved_counts [word ] == 0 ) {
@@ -231,6 +234,8 @@ tsk_haplotype_reset_bitset(tsk_haplotype_t *self)
231234 }
232235}
233236
237+ // FIXME We're building the whole index here, which is a bit sad when we're clipping a
238+ // region.
234239static int
235240tsk_haplotype_build_parent_index (tsk_haplotype_t * self )
236241{
@@ -313,6 +318,7 @@ tsk_haplotype_build_parent_index(tsk_haplotype_t *self)
313318 return ret ;
314319}
315320
321+ // FIXME No point adding mutations who are above nodes we have no interest in.
316322static int
317323tsk_haplotype_build_mutation_index (tsk_haplotype_t * self )
318324{
@@ -408,6 +414,7 @@ tsk_haplotype_build_mutation_index(tsk_haplotype_t *self)
408414 return ret ;
409415}
410416
417+ // FIXME Not sure this is even needed
411418static int
412419tsk_haplotype_build_ancestral_states (tsk_haplotype_t * self )
413420{
@@ -659,13 +666,15 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
659666 edge_parent = edges -> parent ;
660667 bits = self -> unresolved_bits ;
661668
669+ // Create a bitset that tracks which sites are still unresolved
662670 for (idx = 0 ; idx < (tsk_size_t ) self -> num_sites ; idx ++ ) {
663671 haplotype [idx ] = (int8_t ) self -> ancestral_states [idx ];
664672 }
665673 tsk_haplotype_reset_bitset (self );
666674
667675 mut_start = self -> node_mutation_offsets [node ];
668676 mut_end = self -> node_mutation_offsets [node + 1 ];
677+ // Apply mutations above this node
669678 for (int32_t m = mut_start ; m < mut_end ; m ++ ) {
670679 int32_t site = self -> node_mutation_sites [m ];
671680 if (site >= 0 && site < self -> num_sites
@@ -682,6 +691,8 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
682691 child_start = self -> parent_index_range [range_offset ];
683692 child_stop = self -> parent_index_range [range_offset + 1 ];
684693 }
694+ // Push all edges from this node (that are still relavent to resolving sites) onto
695+ // the stack
685696 for (int32_t i = child_start ; i < child_stop ; i ++ ) {
686697 tsk_id_t edge = self -> parent_edge_index [i ];
687698 int32_t start = self -> edge_start_index [edge ];
@@ -700,13 +711,15 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
700711 }
701712 }
702713
714+ // Now process the stack until we run out of edges or have resolved all sites
703715 while (stack_top > 0 ) {
704716 stack_top -- ;
705717 tsk_id_t edge = self -> edge_stack [stack_top ];
706718 tsk_id_t ancestor = edge_parent [edge ];
707719 interval_start = self -> stack_interval_start [stack_top ];
708720 interval_end = self -> stack_interval_end [stack_top ];
709721
722+ // Apply mutations above this ancestor
710723 if (ancestor >= 0 ) {
711724 mut_start = self -> node_mutation_offsets [ancestor ];
712725 mut_end = self -> node_mutation_offsets [ancestor + 1 ];
@@ -720,6 +733,8 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
720733 }
721734 }
722735
736+ // Going up the tree push all edges from this ancestor (that are still relavent
737+ // to resolving sites)
723738 parent_count = 0 ;
724739 if (ancestor >= 0 && self -> parent_index_range != NULL ) {
725740 int32_t range_offset = ancestor * 2 ;
@@ -756,6 +771,8 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
756771 child_stop = 0 ;
757772 }
758773
774+ // Clear out any sites that are still unresolved in this interval but not covered
775+ // by any parent edges
759776 tsk_size_t uncovered_idx ;
760777 while (tsk_haplotype_find_next_uncovered (self , (tsk_size_t ) interval_start ,
761778 (tsk_size_t ) interval_end , self -> parent_interval_start ,
@@ -766,6 +783,7 @@ tsk_haplotype_decode(tsk_haplotype_t *self, tsk_id_t node, int8_t *haplotype)
766783 }
767784 }
768785
786+ // Reset the bitset for next time
769787 for (tsk_size_t w = 0 ; w < self -> num_bit_words ; w ++ ) {
770788 self -> unresolved_bits [w ] = 0 ;
771789 self -> unresolved_counts [w ] = 0 ;
0 commit comments