@@ -292,7 +292,7 @@ def compute_state_probability_matrix(fm, bm, ref_h, query_h, rho, mu):
292292 return (sm , fwd_hap_probs , bwd_hap_probs )
293293
294294
295- def compute_interpolated_haplotype_matrix (
295+ def interpolate_haplotype_probability_matrix (
296296 fwd_hap_probs , bwd_hap_probs , genotyped_pos , imputed_pos
297297):
298298 """
@@ -365,47 +365,65 @@ def run_beagle(ref_h, query_h, pos):
365365 """
366366 Run the BEAGLE 4.1 imputation algorithm.
367367
368+ `ref_h` and `query_h` span all genotyped and imputed markers.
369+
368370 :param numpy.ndarray ref_h: Reference haplotypes.
369371 :param numpy.ndarray query_h: One query haplotype.
370- :param numpy.ndarray pos: Site positions.
372+ :param numpy.ndarray pos: Site positions of all the markers .
371373 :return: MAP alleles at imputed markers in the query haplotype.
372374 :rtype: numpy.ndarray
373375 """
374376 assert ref_h .shape [0 ] == len (pos )
375377 assert query_h .shape [0 ] == len (pos )
378+ # Index of genotyped markers in the query haplotype
376379 genotyped_pos_idx = np .where (query_h != - 1 )[0 ]
380+ # Index of imputed markers in the query haplotype
377381 imputed_pos_idx = np .where (query_h == - 1 )[0 ]
378382 assert len (genotyped_pos_idx ) > 0
379383 assert len (imputed_pos_idx ) > 0
384+ # Site positions of genotyped markers
380385 genotyped_pos = pos [genotyped_pos_idx ]
386+ # Site positions of imputed markers
381387 imputed_pos = pos [imputed_pos_idx ]
382388 m = len (genotyped_pos )
383389 x = len (imputed_pos )
384390 assert m + x == len (pos )
385391 h = ref_h .shape [1 ]
392+ # Subset the reference haplotypes to genotyped markers
386393 ref_h_genotyped = ref_h [genotyped_pos_idx , :]
387394 assert ref_h_genotyped .shape == (m , h )
395+ # Subset the query haplotype to genotyped markers
388396 query_h_genotyped = query_h [genotyped_pos_idx ]
389397 assert len (query_h_genotyped ) == m
398+ # Set mismatch probabilities at genotyped markers
390399 mu = get_mismatch_prob (genotyped_pos )
391400 assert len (mu ) == m
401+ # Set switch probabilities at genotyped markers
392402 rho = get_switch_prob (genotyped_pos , h , ne = 10 ) # Small ref. panel
393403 assert len (rho ) == m
404+ # Compute forward probability matrix over genotyped markers
394405 fm = compute_forward_probability_matrix (ref_h_genotyped , query_h_genotyped , rho , mu )
395406 assert fm .shape == (m , h )
407+ # Compute backward probability matrix over genotyped markers
396408 bm = compute_backward_probability_matrix (
397409 ref_h_genotyped , query_h_genotyped , rho , mu
398410 )
399411 assert bm .shape == (m , h )
400- _ , fwd_hap_probs , bwd_hap_probs = compute_state_probability_matrix (
412+ # Compute HMM state probability matrix over genotyped markers
413+ # and forward and backward haplotype probability matrices
414+ sm , fwd_hap_probs , bwd_hap_probs = compute_state_probability_matrix (
401415 fm , bm , ref_h_genotyped , query_h_genotyped , rho , mu
402416 )
417+ assert sm .shape == (m , h ) # sm not used further
403418 assert fwd_hap_probs .shape == (m , 2 )
404419 assert bwd_hap_probs .shape == (m , 2 )
405- i_hap_probs = compute_interpolated_haplotype_matrix (
420+ # Interpolate haplotype probabilities
421+ # from genotype markers to imputed markers
422+ i_hap_probs = interpolate_haplotype_probability_matrix (
406423 fwd_hap_probs , bwd_hap_probs , genotyped_pos , imputed_pos
407424 )
408425 assert i_hap_probs .shape == (x , 2 )
426+ # Get MAP alleles at imputed markers
409427 imputed_alleles = get_map_alleles (i_hap_probs )
410428 assert len (imputed_alleles ) == x
411429 return imputed_alleles
0 commit comments