@@ -123,6 +123,21 @@ tsk_haplotype_bitset_next(
123123 return start < limit ? start : limit ;
124124}
125125
126+ static inline uint64_t
127+ tsk_haplotype_mask_from_offsets (uint32_t start_offset , uint32_t end_offset )
128+ {
129+ if (start_offset >= end_offset ) {
130+ return 0 ;
131+ }
132+ if (start_offset == 0 && end_offset >= 64 ) {
133+ return UINT64_MAX ;
134+ }
135+ uint64_t high_mask
136+ = end_offset >= 64 ? UINT64_MAX : ((UINT64_C (1 ) << end_offset ) - 1 );
137+ uint64_t low_mask = start_offset == 0 ? 0 : ((UINT64_C (1 ) << start_offset ) - 1 );
138+ return high_mask & ~low_mask ;
139+ }
140+
126141static bool
127142tsk_haplotype_find_next_uncovered (tsk_haplotype_t * self , tsk_size_t start ,
128143 tsk_size_t end , const int32_t * interval_start , const int32_t * interval_end ,
@@ -139,6 +154,7 @@ tsk_haplotype_find_next_uncovered(tsk_haplotype_t *self, tsk_size_t start,
139154 uint64_t start_mask = UINT64_MAX << (start & 63 );
140155 for (; word <= last_word && word < self -> num_bit_words ; word ++ ) {
141156 if (self -> unresolved_counts [word ] == 0 ) {
157+ start_mask = UINT64_MAX ;
142158 continue ;
143159 }
144160 uint64_t word_bits = self -> unresolved_bits [word ];
@@ -149,26 +165,55 @@ tsk_haplotype_find_next_uncovered(tsk_haplotype_t *self, tsk_size_t start,
149165 uint64_t end_mask = UINT64_MAX >> (63 - ((end - 1 ) & 63 ));
150166 word_bits &= end_mask ;
151167 }
168+ if (word_bits == 0 ) {
169+ start_mask = UINT64_MAX ;
170+ continue ;
171+ }
172+ if (interval_count > 0 ) {
173+ int32_t word_left = (int32_t )(word << 6 );
174+ int32_t word_right = word_left + 64 ;
175+ uint64_t coverage_mask = 0 ;
176+ for (tsk_size_t p = 0 ; p < interval_count ; p ++ ) {
177+ int32_t interval_left = interval_start [p ];
178+ int32_t interval_right = interval_end [p ];
179+ if (interval_left >= interval_right ) {
180+ continue ;
181+ }
182+ if (interval_right <= word_left || interval_left >= word_right ) {
183+ continue ;
184+ }
185+ int32_t clipped_left
186+ = interval_left > word_left ? interval_left : word_left ;
187+ int32_t clipped_right
188+ = interval_right < word_right ? interval_right : word_right ;
189+ if ((int32_t ) start > clipped_left ) {
190+ clipped_left = (int32_t ) start ;
191+ }
192+ if ((int32_t ) end < clipped_right ) {
193+ clipped_right = (int32_t ) end ;
194+ }
195+ if (clipped_left >= clipped_right ) {
196+ continue ;
197+ }
198+ uint32_t start_offset = (uint32_t )(clipped_left - word_left );
199+ uint32_t end_offset = (uint32_t )(clipped_right - word_left );
200+ coverage_mask
201+ |= tsk_haplotype_mask_from_offsets (start_offset , end_offset );
202+ if (coverage_mask == UINT64_MAX ) {
203+ break ;
204+ }
205+ }
206+ word_bits &= ~coverage_mask ;
207+ }
152208 while (word_bits != 0 ) {
153- uint64_t lowest_bit = word_bits & (~word_bits + 1 );
154209 tsk_size_t bit = tsk_haplotype_ctz64 (word_bits );
155- word_bits ^= lowest_bit ;
210+ word_bits &= word_bits - 1 ;
156211 tsk_size_t bit_index = (word << 6 ) + bit ;
157212 if (bit_index >= end ) {
158213 break ;
159214 }
160- bool covered = false;
161- for (tsk_size_t p = 0 ; p < interval_count ; p ++ ) {
162- if (interval_start [p ] <= (int32_t ) bit_index
163- && (int32_t ) bit_index < interval_end [p ]) {
164- covered = true;
165- break ;
166- }
167- }
168- if (!covered ) {
169- * out_index = bit_index ;
170- return true;
171- }
215+ * out_index = bit_index ;
216+ return true;
172217 }
173218 start_mask = UINT64_MAX ;
174219 }
0 commit comments