44import io
55
66import numpy as np
7- import pandas as pd
87
98import tskit
109
@@ -116,6 +115,9 @@ def get_toy_ts():
116115# bwd[m][h] = (unadj. bwd[m][h] + shift) * scale, otherwise
117116# where scale = (1 - switch prob.)/sum of unadj. bwd[m],
118117# and shift = switch prob./n.
118+ #
119+ # For each site, the sum of backward value over all haplotypes is calculated
120+ # before scaling and shifting.
119121
120122_fwd_matrix_text_1 = """
121123m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
@@ -139,10 +141,10 @@ def get_toy_ts():
139141
140142_bwd_matrix_text_1 = """
141143m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
142- 3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
143- 3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
144- 3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
145- 3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
144+ 3,0,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
145+ 3,1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
146+ 3,2,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
147+ 3,3,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
1461482,0,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
1471492,1,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
1481502,2,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
@@ -179,10 +181,10 @@ def get_toy_ts():
179181
180182_bwd_matrix_text_2 = """
181183m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
182- 3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
183- 3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
184- 3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
185- 3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA ,1.000000
184+ 3,0,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
185+ 3,1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
186+ 3,2,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
187+ 3,3,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,1.000000
1861882,0,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
1871892,1,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
1881902,2,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
@@ -198,28 +200,22 @@ def get_toy_ts():
198200"""
199201
200202
201- def convert_to_pd_df (matrix_text ):
203+ def convert_to_numpy (matrix_text ):
202204 """
203- Converts a matrix in text to a Pandas dataframe and returns it.
205+ Converts a matrix in text to numpy and returns it.
204206 """
205- df = pd . read_csv (io .StringIO (matrix_text ))
206- for i in np .arange (df .shape [0 ]):
207+ x = np . loadtxt (io .StringIO (matrix_text ), skiprows = 1 , delimiter = "," )
208+ for i in np .arange (x .shape [0 ]):
207209 # Check that switch and non-switch probabilities sum to 1
208- assert df . probRec [ i ] + df . probNoRec [ i ] == 1 or np . isnan ( df . probRec [ i ])
210+ assert ( x [ i , 2 ] + x [ i , 3 ]) == 1 or x [ i , 2 ] == - 1
209211 # Check that non-mismatch and mismatch probabilities sum to 1
210- assert df .noErrProb [i ] + df .errProb [i ] == 1 or np .isnan (df .noErrProb [i ])
211- matrix = df .val .to_numpy ().reshape (
212- (
213- 4 ,
214- 4 ,
215- )
216- ) # size (m, h)
217- return matrix
212+ assert (x [i , 4 ] + x [i , 5 ]) == 1 or x [i , 4 ] == - 1
213+ return x [:, - 1 ].reshape ((4 , 4 )) # size (m, h)
218214
219215
220216def get_forward_backward_matrices ():
221- fwd_matrix_1 = convert_to_pd_df (_fwd_matrix_text_1 )
222- bwd_matrix_1 = convert_to_pd_df (_bwd_matrix_text_1 )
223- fwd_matrix_2 = convert_to_pd_df (_fwd_matrix_text_2 )
224- bwd_matrix_2 = convert_to_pd_df (_bwd_matrix_text_2 )
217+ fwd_matrix_1 = convert_to_numpy (_fwd_matrix_text_1 )
218+ bwd_matrix_1 = convert_to_numpy (_bwd_matrix_text_1 )
219+ fwd_matrix_2 = convert_to_numpy (_fwd_matrix_text_2 )
220+ bwd_matrix_2 = convert_to_numpy (_bwd_matrix_text_2 )
225221 return [fwd_matrix_1 , bwd_matrix_1 , fwd_matrix_2 , bwd_matrix_2 ]
0 commit comments