Replace use of pandas with numpy

szhan · szhan · commit 10543bd485d8 · 2023-08-08T07:18:14.000+01:00
diff --git a/python/tests/test_imputation.py b/python/tests/test_imputation.py
@@ -4,7 +4,6 @@
 import io
 
 import numpy as np
-import pandas as pd
 
 import tskit
 
@@ -116,6 +115,9 @@ def get_toy_ts():
 #    bwd[m][h] = (unadj. bwd[m][h] + shift) * scale, otherwise
 #    where scale = (1 - switch prob.)/sum of unadj. bwd[m],
 #        and shift = switch prob./n.
+#
+# For each site, the sum of backward value over all haplotypes is calculated
+# before scaling and shifting.
 
 _fwd_matrix_text_1 = """
 m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
@@ -139,10 +141,10 @@ def get_toy_ts():
 
 _bwd_matrix_text_1 = """
 m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
-3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
-3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
-3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
-3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
+3,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
+3,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
+3,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
+3,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
 2,0,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
 2,1,1.000000,0.000000,0.999900,0.000100,0,0,0.000000,0.250000,0.250050,0.250000
 2,2,1.000000,0.000000,0.999900,0.000100,1,0,0.000000,0.250000,0.250050,0.250000
@@ -179,10 +181,10 @@ def get_toy_ts():
 
 _bwd_matrix_text_2 = """
 m,h,probRec,probNoRec,noErrProb,errProb,refAl,queryAl,shift,scale,sum,val
-3,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
-3,1,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
-3,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
-3,3,NA,NA,NA,NA,NA,NA,NA,NA,NA,1.000000
+3,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
+3,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
+3,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
+3,3,-1,-1,-1,-1,-1,-1,-1,-1,-1,1.000000
 2,0,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
 2,1,1.000000,0.000000,0.999900,0.000100,0,1,0.000000,0.250000,0.749950,0.250000
 2,2,1.000000,0.000000,0.999900,0.000100,1,1,0.000000,0.250000,0.749950,0.250000
@@ -198,28 +200,22 @@ def get_toy_ts():
 """
 
 
-def convert_to_pd_df(matrix_text):
+def convert_to_numpy(matrix_text):
     """
-    Converts a matrix in text to a Pandas dataframe and returns it.
+    Converts a matrix in text to numpy and returns it.
     """
-    df = pd.read_csv(io.StringIO(matrix_text))
-    for i in np.arange(df.shape[0]):
+    x = np.loadtxt(io.StringIO(matrix_text), skiprows=1, delimiter=",")
+    for i in np.arange(x.shape[0]):
         # Check that switch and non-switch probabilities sum to 1
-        assert df.probRec[i] + df.probNoRec[i] == 1 or np.isnan(df.probRec[i])
+        assert (x[i, 2] + x[i, 3]) == 1 or x[i, 2] == -1
         # Check that non-mismatch and mismatch probabilities sum to 1
-        assert df.noErrProb[i] + df.errProb[i] == 1 or np.isnan(df.noErrProb[i])
-    matrix = df.val.to_numpy().reshape(
-        (
-            4,
-            4,
-        )
-    )  # size (m, h)
-    return matrix
+        assert (x[i, 4] + x[i, 5]) == 1 or x[i, 4] == -1
+    return x[:, -1].reshape((4, 4))  # size (m, h)
 
 
 def get_forward_backward_matrices():
-    fwd_matrix_1 = convert_to_pd_df(_fwd_matrix_text_1)
-    bwd_matrix_1 = convert_to_pd_df(_bwd_matrix_text_1)
-    fwd_matrix_2 = convert_to_pd_df(_fwd_matrix_text_2)
-    bwd_matrix_2 = convert_to_pd_df(_bwd_matrix_text_2)
+    fwd_matrix_1 = convert_to_numpy(_fwd_matrix_text_1)
+    bwd_matrix_1 = convert_to_numpy(_bwd_matrix_text_1)
+    fwd_matrix_2 = convert_to_numpy(_fwd_matrix_text_2)
+    bwd_matrix_2 = convert_to_numpy(_bwd_matrix_text_2)
     return [fwd_matrix_1, bwd_matrix_1, fwd_matrix_2, bwd_matrix_2]