@@ -57,6 +57,7 @@ def __init__(
5757 verbosity = 0 ,
5858 internal_checks = False ,
5959 centre = True ,
60+ span_normalise = True ,
6061 ):
6162 self .sample_weights = np .asarray (sample_weights , dtype = np .float64 )
6263 self .num_weights = self .sample_weights .shape [1 ]
@@ -80,6 +81,7 @@ def __init__(
8081 self .verbosity = verbosity
8182 self .internal_checks = internal_checks
8283 self .centre = centre
84+ self .span_normalise = span_normalise
8385
8486 if self .centre :
8587 self .sample_weights -= np .mean (self .sample_weights , axis = 0 )
@@ -285,6 +287,12 @@ def run(self):
285287 if self .centre :
286288 for m in range (num_windows ):
287289 out [m ] -= np .mean (out [m ], axis = 0 )
290+
291+ if self .span_normalise :
292+ window_lengths = np .diff (self .windows )
293+ for m in range (num_windows ):
294+ out [m ] /= window_lengths [m ]
295+
288296 return out
289297
290298
@@ -326,7 +334,7 @@ def relatedness_vector(ts, sample_weights, windows=None, nodes=None, **kwargs):
326334 return out
327335
328336
329- def relatedness_matrix (ts , windows , centre , nodes = None ):
337+ def relatedness_matrix (ts , windows , centre , nodes = None , span_normalise = True ):
330338 if nodes is None :
331339 keep_rows = np .arange (ts .num_samples )
332340 keep_cols = np .arange (ts .num_samples )
@@ -356,7 +364,7 @@ def relatedness_matrix(ts, windows, centre, nodes=None):
356364 indexes = [(i , j ) for i in range (ts .num_samples ) for j in range (ts .num_samples )],
357365 windows = use_windows ,
358366 mode = "branch" ,
359- span_normalise = False ,
367+ span_normalise = span_normalise ,
360368 proportion = False ,
361369 centre = centre ,
362370 )
@@ -375,7 +383,15 @@ def relatedness_matrix(ts, windows, centre, nodes=None):
375383
376384
377385def verify_relatedness_vector (
378- ts , w , windows , * , internal_checks = False , verbosity = 0 , centre = True , nodes = None
386+ ts ,
387+ w ,
388+ windows ,
389+ * ,
390+ internal_checks = False ,
391+ verbosity = 0 ,
392+ centre = True ,
393+ nodes = None ,
394+ span_normalise = True ,
379395):
380396 R1 = relatedness_vector (
381397 ts ,
@@ -385,18 +401,26 @@ def verify_relatedness_vector(
385401 verbosity = verbosity ,
386402 centre = centre ,
387403 nodes = nodes ,
404+ span_normalise = span_normalise ,
388405 )
389406 nrows = ts .num_samples if nodes is None else len (nodes )
390407 wvec = w if len (w .shape ) > 1 else w [:, np .newaxis ]
391- Sigma = relatedness_matrix (ts , windows = windows , centre = centre , nodes = nodes )
408+ Sigma = relatedness_matrix (
409+ ts , windows = windows , centre = centre , nodes = nodes , span_normalise = span_normalise
410+ )
392411 if windows is None :
393412 R2 = Sigma .dot (wvec )
394413 else :
395414 R2 = np .zeros ((len (windows ) - 1 , nrows , wvec .shape [1 ]))
396415 for k in range (len (windows ) - 1 ):
397416 R2 [k ] = Sigma [k ].dot (wvec )
398417 R3 = ts .genetic_relatedness_vector (
399- w , windows = windows , mode = "branch" , centre = centre , nodes = nodes
418+ w ,
419+ windows = windows ,
420+ mode = "branch" ,
421+ centre = centre ,
422+ nodes = nodes ,
423+ span_normalise = span_normalise ,
400424 )
401425 if verbosity > 0 :
402426 print (ts .draw_text ())
@@ -425,6 +449,7 @@ def check_relatedness_vector(
425449 verbosity = 0 ,
426450 seed = 123 ,
427451 centre = True ,
452+ span_normalise = True ,
428453 do_nodes = True ,
429454):
430455 rng = np .random .default_rng (seed = seed )
@@ -456,6 +481,7 @@ def check_relatedness_vector(
456481 verbosity = verbosity ,
457482 centre = centre ,
458483 nodes = nodes ,
484+ span_normalise = span_normalise ,
459485 )
460486 return R
461487
@@ -568,8 +594,9 @@ def test_good_nodes(self):
568594 @pytest .mark .parametrize ("n" , [2 , 3 , 5 ])
569595 @pytest .mark .parametrize ("seed" , range (1 , 4 ))
570596 @pytest .mark .parametrize ("centre" , (True , False ))
597+ @pytest .mark .parametrize ("span_normalise" , (True , False ))
571598 @pytest .mark .parametrize ("num_windows" , (0 , 1 , 2 , 3 ))
572- def test_small_internal_checks (self , n , seed , centre , num_windows ):
599+ def test_small_internal_checks (self , n , seed , centre , span_normalise , num_windows ):
573600 ts = msprime .sim_ancestry (
574601 n ,
575602 ploidy = 1 ,
@@ -579,14 +606,19 @@ def test_small_internal_checks(self, n, seed, centre, num_windows):
579606 )
580607 assert ts .num_trees >= 2
581608 check_relatedness_vector (
582- ts , num_windows = num_windows , internal_checks = True , centre = centre
609+ ts ,
610+ num_windows = num_windows ,
611+ internal_checks = True ,
612+ centre = centre ,
613+ span_normalise = span_normalise ,
583614 )
584615
585616 @pytest .mark .parametrize ("n" , [2 , 3 , 5 , 15 ])
586617 @pytest .mark .parametrize ("seed" , range (1 , 5 ))
587618 @pytest .mark .parametrize ("centre" , (True , False ))
619+ @pytest .mark .parametrize ("span_normalise" , (True , False ))
588620 @pytest .mark .parametrize ("num_windows" , (0 , 1 , 2 , 3 ))
589- def test_simple_sims (self , n , seed , centre , num_windows ):
621+ def test_simple_sims (self , n , seed , centre , span_normalise , num_windows ):
590622 ts = msprime .sim_ancestry (
591623 n ,
592624 ploidy = 1 ,
@@ -597,7 +629,11 @@ def test_simple_sims(self, n, seed, centre, num_windows):
597629 )
598630 assert ts .num_trees >= 2
599631 check_relatedness_vector (
600- ts , num_windows = num_windows , centre = centre , verbosity = 0
632+ ts ,
633+ num_windows = num_windows ,
634+ centre = centre ,
635+ verbosity = 0 ,
636+ span_normalise = span_normalise ,
601637 )
602638
603639 def test_simple_sims_windows (self ):
@@ -613,18 +649,42 @@ def test_simple_sims_windows(self):
613649 assert ts .num_trees >= 2
614650 W = np .linspace (0 , 1 , 2 * ts .num_samples ).reshape ((ts .num_samples , 2 ))
615651 kwargs = {"centre" : False , "mode" : "branch" }
616- total = ts .genetic_relatedness_vector (W , ** kwargs )
652+ total = ts .genetic_relatedness_vector (W , span_normalise = False , ** kwargs )
617653 for windows in [[0 , L ], [0 , L / 3 , L / 2 , L ]]:
618- pieces = ts .genetic_relatedness_vector (W , windows = windows , ** kwargs )
654+ pieces = ts .genetic_relatedness_vector (
655+ W , windows = windows , span_normalise = False , ** kwargs
656+ )
619657 np .testing .assert_allclose (total , pieces .sum (axis = 0 ), atol = 1e-13 )
620658 assert len (pieces ) == len (windows ) - 1
621659 for k in range (len (pieces )):
622660 piece = ts .genetic_relatedness_vector (
623- W , windows = windows [k : k + 2 ], ** kwargs
661+ W , windows = windows [k : k + 2 ], span_normalise = False , ** kwargs
624662 )
625663 assert piece .shape [0 ] == 1
626664 np .testing .assert_allclose (piece [0 ], pieces [k ], atol = 1e-13 )
627665
666+ @pytest .mark .parametrize ("mode" , ("branch" ,))
667+ def test_simple_span_normalise (self , mode ):
668+ ts = msprime .sim_ancestry (
669+ 4 ,
670+ ploidy = 1 ,
671+ population_size = 20 ,
672+ sequence_length = 100 ,
673+ recombination_rate = 0.01 ,
674+ random_seed = 123 ,
675+ )
676+ assert ts .num_trees >= 2
677+ x = np .linspace (0 , 1 , ts .num_samples )
678+ w = np .linspace (0 , ts .sequence_length , 11 )[[0 , 3 , 4 , - 1 ]]
679+ a = ts .genetic_relatedness_vector (x , windows = w , mode = mode ) # default is True
680+ ax = ts .genetic_relatedness_vector (x , windows = w , mode = mode , span_normalise = True )
681+ assert np .all (ax == a )
682+ b = ts .genetic_relatedness_vector (x , windows = w , span_normalise = False , mode = mode )
683+ assert a .shape == b .shape
684+ dw = np .diff (w )
685+ for aa , bb , ww in zip (a , b , dw , strict = True ):
686+ assert np .allclose (aa * ww , bb )
687+
628688 @pytest .mark .parametrize ("n" , [2 , 3 , 5 , 15 ])
629689 @pytest .mark .parametrize ("centre" , (True , False ))
630690 def test_single_balanced_tree (self , n , centre ):
@@ -680,10 +740,16 @@ def test_missing_flanks(self, centre, num_windows):
680740
681741 @pytest .mark .parametrize ("ts" , get_example_tree_sequences ())
682742 @pytest .mark .parametrize ("centre" , (True , False ))
683- def test_suite_examples (self , ts , centre ):
743+ def test_suite_examples_centre (self , ts , centre ):
684744 if ts .num_samples > 0 :
685745 check_relatedness_vector (ts , centre = centre )
686746
747+ @pytest .mark .parametrize ("ts" , get_example_tree_sequences ())
748+ @pytest .mark .parametrize ("span_normalise" , (True , False ))
749+ def test_suite_examples_span_normalise (self , ts , span_normalise ):
750+ if ts .num_samples > 0 :
751+ check_relatedness_vector (ts , span_normalise = span_normalise )
752+
687753 @pytest .mark .parametrize ("n" , [2 , 3 , 10 ])
688754 def test_dangling_on_samples (self , n ):
689755 # Adding non sample branches below the samples does not alter
@@ -750,21 +816,21 @@ def pca(ts, windows, centre, samples=None, individuals=None, time_windows=None):
750816 if drop_dimension :
751817 windows = [0 , ts .sequence_length ]
752818 if time_windows is None :
753- Sigma = relatedness_matrix (ts = ts , windows = windows , centre = False )[:, ii , :][
754- :, :, ii
755- ]
819+ Sigma = relatedness_matrix (
820+ ts = ts , windows = windows , centre = False , span_normalise = False
821+ )[:, ii , :][:, :, ii ]
756822 else :
757823 assert time_windows [0 ] < time_windows [1 ]
758824 ts_low , ts_high = (
759825 ts .decapitate (time_windows [0 ]),
760826 ts .decapitate (time_windows [1 ]),
761827 )
762- Sigma_low = relatedness_matrix (ts = ts_low , windows = windows , centre = False )[
763- :, ii , :
764- ][:, :, ii ]
765- Sigma_high = relatedness_matrix (ts = ts_high , windows = windows , centre = False )[
766- :, ii , :
767- ][:, :, ii ]
828+ Sigma_low = relatedness_matrix (
829+ ts = ts_low , windows = windows , centre = False , span_normalise = False
830+ )[:, ii , : ][:, :, ii ]
831+ Sigma_high = relatedness_matrix (
832+ ts = ts_high , windows = windows , centre = False , span_normalise = False
833+ )[:, ii , : ][:, :, ii ]
768834 Sigma = Sigma_high - Sigma_low
769835 if individuals is not None :
770836 ni = len (individuals )
0 commit comments