@@ -278,15 +278,32 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
278278 assert np .all (np .isin (np .unique (ref_h ), [0 , 1 ]))
279279 assert np .all (np .isin (np .unique (query_h ), [- 1 , 0 , 1 ]))
280280 sm = np .zeros ((m , h ), dtype = np .float64 ) # HMM state probability matrix
281- fwd_hap_probs = np .zeros ((m , 2 ), dtype = np .float64 )
282- bwd_hap_probs = np .zeros ((m , 2 ), dtype = np .float64 )
281+
282+ def _get_ref_hap_seg (a , b ):
283+ """
284+ Assuming all biallelic sites, get the index of a reference haplotype segment (i)
285+ defined by the alleles a and b at two adjacent genotyped markers.
286+
287+ #i, a, b
288+ 0, 0, 0
289+ 1, 1, 0
290+ 2, 0, 1
291+ 3, 1, 1
292+
293+ See https://github.com/tskit-dev/tskit/issues/2802#issuecomment-1698767169
294+ """
295+ ref_hap_idx = a * 1 + b * 2
296+ return ref_hap_idx
297+
298+ fwd_hap_probs = np .zeros ((m , 4 ), dtype = np .float64 )
299+ bwd_hap_probs = np .zeros ((m , 4 ), dtype = np .float64 )
283300 for i in np .arange (m - 1 , 0 , - 1 ):
284301 for j in np .arange (h ):
285302 sm [i , j ] = fm [i , j ] * bm [i , j ]
286- ref_allele_mP1 = ref_h [i + 1 , j ]
287- ref_allele_m = ref_h [i , j ]
288- fwd_hap_probs [i , ref_allele_mP1 ] += sm [i , j ]
289- bwd_hap_probs [i , ref_allele_m ] += sm [i , j ]
303+ ref_hap_seg_mP1 = _get_ref_hap_seg ( ref_h [i , j ], ref_h [ i + 1 , j ])
304+ ref_hap_seg_m = _get_ref_hap_seg ( ref_h [i - 1 , j ], ref_h [ i , j ])
305+ fwd_hap_probs [i , ref_hap_seg_mP1 ] += sm [i , j ]
306+ bwd_hap_probs [i , ref_hap_seg_m ] += sm [i , j ]
290307 sum_fwd_hap_probs = np .sum (fwd_hap_probs [i , :])
291308 fwd_hap_probs [i , :] /= sum_fwd_hap_probs
292309 bwd_hap_probs [i , :] /= sum_fwd_hap_probs # Sum of forward hap probs
0 commit comments