Skip to content

Commit d387952

Browse files
petrelharpbenjeffery
authored andcommitted
enable span_normalise for genetic_relatedness_vector; closes #3241
1 parent f43ab1f commit d387952

File tree

7 files changed

+110
-23
lines changed

7 files changed

+110
-23
lines changed

c/tests/test_stats.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2068,7 +2068,7 @@ test_empty_genetic_relatedness_vector(void)
20682068
int ret;
20692069
tsk_treeseq_t ts;
20702070
tsk_size_t num_samples;
2071-
double *weights, *result;
2071+
double *weights, *result, *result2;
20722072
tsk_size_t j;
20732073
tsk_size_t num_weights = 2;
20742074
double windows[] = { 0, 0 };
@@ -2079,6 +2079,7 @@ test_empty_genetic_relatedness_vector(void)
20792079
windows[1] = tsk_treeseq_get_sequence_length(&ts);
20802080
weights = tsk_malloc(num_weights * num_samples * sizeof(double));
20812081
result = tsk_malloc(num_weights * num_samples * sizeof(double));
2082+
result2 = tsk_malloc(num_weights * num_samples * sizeof(double));
20822083
for (j = 0; j < num_samples; j++) {
20832084
weights[j] = 1.0;
20842085
}
@@ -2099,10 +2100,17 @@ test_empty_genetic_relatedness_vector(void)
20992100
ret = tsk_treeseq_genetic_relatedness_vector(
21002101
&ts, num_weights, weights, 1, windows, num_samples, ts.samples, result, 0);
21012102
CU_ASSERT_EQUAL_FATAL(ret, 0);
2103+
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 1, windows,
2104+
num_samples, ts.samples, result2, TSK_STAT_SPAN_NORMALISE);
2105+
CU_ASSERT_EQUAL_FATAL(ret, 0);
2106+
for (j = 0; j < num_samples * num_weights; j++) {
2107+
CU_ASSERT_EQUAL_FATAL(result[j] / (windows[1] - windows[0]), result2[j]);
2108+
}
21022109

21032110
tsk_treeseq_free(&ts);
21042111
free(weights);
21052112
free(result);
2113+
free(result2);
21062114
}
21072115

21082116
static void

c/tskit/trees.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10771,6 +10771,9 @@ tsk_matvec_calculator_run(tsk_matvec_calculator_t *self)
1077110771
tsk_matvec_calculator_print_state(self, tsk_get_debug_stream());
1077210772
}
1077310773
}
10774+
if (!!(self->options & TSK_STAT_SPAN_NORMALISE)) {
10775+
span_normalise(self->num_windows, windows, out_size, self->result);
10776+
}
1077410777

1077510778
/* out: */
1077610779
return ret;

docs/python-api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,7 @@ Single site
322322
TreeSequence.genetic_relatedness
323323
TreeSequence.genetic_relatedness_weighted
324324
TreeSequence.genetic_relatedness_vector
325+
TreeSequence.genetic_relatedness_matrix
325326
TreeSequence.general_stat
326327
TreeSequence.segregating_sites
327328
TreeSequence.sample_count_stat

docs/stats.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ appears beside the listed method.
7272
* {meth}`~TreeSequence.divergence`
7373
* {meth}`~TreeSequence.genetic_relatedness`
7474
{meth}`~TreeSequence.genetic_relatedness_weighted`
75+
{meth}`~TreeSequence.genetic_relatedness_vector`
76+
{meth}`~TreeSequence.genetic_relatedness_matrix`
7577
* {meth}`~TreeSequence.f4`
7678
{meth}`~TreeSequence.f3`
7779
{meth}`~TreeSequence.f2`

python/CHANGELOG.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@
5959
(ensuring that parent mutations occur before children), then node, then
6060
their original order in the tables. (:user:`benjeffery`, :pr:`3257`, :issue:`3253`)
6161

62+
- Fix bug in ``TreeSequence.genetic_relatedness_vector`` that previously ignored
63+
``span_normalise``: previously, ``span_normalise`` was always set to ``False``;
64+
now the default is ``True`` in agreement with other statistics, so the returned
65+
values will change. (:user:`petrelharp`, :pr:`3300`, :issue:`3241`)
66+
6267
- Fix bug in ``TreeSequence.pair_coalescence_counts`` when ``span_normalise=True``
6368
and a window breakpoint falls within an internal missing interval.
6469
(:user:`nspope`, :pr:`3176`, :issue:`3175`)

python/tests/test_relatedness_vector.py

Lines changed: 88 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
verbosity=0,
5858
internal_checks=False,
5959
centre=True,
60+
span_normalise=True,
6061
):
6162
self.sample_weights = np.asarray(sample_weights, dtype=np.float64)
6263
self.num_weights = self.sample_weights.shape[1]
@@ -80,6 +81,7 @@ def __init__(
8081
self.verbosity = verbosity
8182
self.internal_checks = internal_checks
8283
self.centre = centre
84+
self.span_normalise = span_normalise
8385

8486
if self.centre:
8587
self.sample_weights -= np.mean(self.sample_weights, axis=0)
@@ -285,6 +287,12 @@ def run(self):
285287
if self.centre:
286288
for m in range(num_windows):
287289
out[m] -= np.mean(out[m], axis=0)
290+
291+
if self.span_normalise:
292+
window_lengths = np.diff(self.windows)
293+
for m in range(num_windows):
294+
out[m] /= window_lengths[m]
295+
288296
return out
289297

290298

@@ -326,7 +334,7 @@ def relatedness_vector(ts, sample_weights, windows=None, nodes=None, **kwargs):
326334
return out
327335

328336

329-
def relatedness_matrix(ts, windows, centre, nodes=None):
337+
def relatedness_matrix(ts, windows, centre, nodes=None, span_normalise=True):
330338
if nodes is None:
331339
keep_rows = np.arange(ts.num_samples)
332340
keep_cols = np.arange(ts.num_samples)
@@ -356,7 +364,7 @@ def relatedness_matrix(ts, windows, centre, nodes=None):
356364
indexes=[(i, j) for i in range(ts.num_samples) for j in range(ts.num_samples)],
357365
windows=use_windows,
358366
mode="branch",
359-
span_normalise=False,
367+
span_normalise=span_normalise,
360368
proportion=False,
361369
centre=centre,
362370
)
@@ -375,7 +383,15 @@ def relatedness_matrix(ts, windows, centre, nodes=None):
375383

376384

377385
def verify_relatedness_vector(
378-
ts, w, windows, *, internal_checks=False, verbosity=0, centre=True, nodes=None
386+
ts,
387+
w,
388+
windows,
389+
*,
390+
internal_checks=False,
391+
verbosity=0,
392+
centre=True,
393+
nodes=None,
394+
span_normalise=True,
379395
):
380396
R1 = relatedness_vector(
381397
ts,
@@ -385,18 +401,26 @@ def verify_relatedness_vector(
385401
verbosity=verbosity,
386402
centre=centre,
387403
nodes=nodes,
404+
span_normalise=span_normalise,
388405
)
389406
nrows = ts.num_samples if nodes is None else len(nodes)
390407
wvec = w if len(w.shape) > 1 else w[:, np.newaxis]
391-
Sigma = relatedness_matrix(ts, windows=windows, centre=centre, nodes=nodes)
408+
Sigma = relatedness_matrix(
409+
ts, windows=windows, centre=centre, nodes=nodes, span_normalise=span_normalise
410+
)
392411
if windows is None:
393412
R2 = Sigma.dot(wvec)
394413
else:
395414
R2 = np.zeros((len(windows) - 1, nrows, wvec.shape[1]))
396415
for k in range(len(windows) - 1):
397416
R2[k] = Sigma[k].dot(wvec)
398417
R3 = ts.genetic_relatedness_vector(
399-
w, windows=windows, mode="branch", centre=centre, nodes=nodes
418+
w,
419+
windows=windows,
420+
mode="branch",
421+
centre=centre,
422+
nodes=nodes,
423+
span_normalise=span_normalise,
400424
)
401425
if verbosity > 0:
402426
print(ts.draw_text())
@@ -425,6 +449,7 @@ def check_relatedness_vector(
425449
verbosity=0,
426450
seed=123,
427451
centre=True,
452+
span_normalise=True,
428453
do_nodes=True,
429454
):
430455
rng = np.random.default_rng(seed=seed)
@@ -456,6 +481,7 @@ def check_relatedness_vector(
456481
verbosity=verbosity,
457482
centre=centre,
458483
nodes=nodes,
484+
span_normalise=span_normalise,
459485
)
460486
return R
461487

@@ -568,8 +594,9 @@ def test_good_nodes(self):
568594
@pytest.mark.parametrize("n", [2, 3, 5])
569595
@pytest.mark.parametrize("seed", range(1, 4))
570596
@pytest.mark.parametrize("centre", (True, False))
597+
@pytest.mark.parametrize("span_normalise", (True, False))
571598
@pytest.mark.parametrize("num_windows", (0, 1, 2, 3))
572-
def test_small_internal_checks(self, n, seed, centre, num_windows):
599+
def test_small_internal_checks(self, n, seed, centre, span_normalise, num_windows):
573600
ts = msprime.sim_ancestry(
574601
n,
575602
ploidy=1,
@@ -579,14 +606,19 @@ def test_small_internal_checks(self, n, seed, centre, num_windows):
579606
)
580607
assert ts.num_trees >= 2
581608
check_relatedness_vector(
582-
ts, num_windows=num_windows, internal_checks=True, centre=centre
609+
ts,
610+
num_windows=num_windows,
611+
internal_checks=True,
612+
centre=centre,
613+
span_normalise=span_normalise,
583614
)
584615

585616
@pytest.mark.parametrize("n", [2, 3, 5, 15])
586617
@pytest.mark.parametrize("seed", range(1, 5))
587618
@pytest.mark.parametrize("centre", (True, False))
619+
@pytest.mark.parametrize("span_normalise", (True, False))
588620
@pytest.mark.parametrize("num_windows", (0, 1, 2, 3))
589-
def test_simple_sims(self, n, seed, centre, num_windows):
621+
def test_simple_sims(self, n, seed, centre, span_normalise, num_windows):
590622
ts = msprime.sim_ancestry(
591623
n,
592624
ploidy=1,
@@ -597,7 +629,11 @@ def test_simple_sims(self, n, seed, centre, num_windows):
597629
)
598630
assert ts.num_trees >= 2
599631
check_relatedness_vector(
600-
ts, num_windows=num_windows, centre=centre, verbosity=0
632+
ts,
633+
num_windows=num_windows,
634+
centre=centre,
635+
verbosity=0,
636+
span_normalise=span_normalise,
601637
)
602638

603639
def test_simple_sims_windows(self):
@@ -613,18 +649,42 @@ def test_simple_sims_windows(self):
613649
assert ts.num_trees >= 2
614650
W = np.linspace(0, 1, 2 * ts.num_samples).reshape((ts.num_samples, 2))
615651
kwargs = {"centre": False, "mode": "branch"}
616-
total = ts.genetic_relatedness_vector(W, **kwargs)
652+
total = ts.genetic_relatedness_vector(W, span_normalise=False, **kwargs)
617653
for windows in [[0, L], [0, L / 3, L / 2, L]]:
618-
pieces = ts.genetic_relatedness_vector(W, windows=windows, **kwargs)
654+
pieces = ts.genetic_relatedness_vector(
655+
W, windows=windows, span_normalise=False, **kwargs
656+
)
619657
np.testing.assert_allclose(total, pieces.sum(axis=0), atol=1e-13)
620658
assert len(pieces) == len(windows) - 1
621659
for k in range(len(pieces)):
622660
piece = ts.genetic_relatedness_vector(
623-
W, windows=windows[k : k + 2], **kwargs
661+
W, windows=windows[k : k + 2], span_normalise=False, **kwargs
624662
)
625663
assert piece.shape[0] == 1
626664
np.testing.assert_allclose(piece[0], pieces[k], atol=1e-13)
627665

666+
@pytest.mark.parametrize("mode", ("branch",))
667+
def test_simple_span_normalise(self, mode):
668+
ts = msprime.sim_ancestry(
669+
4,
670+
ploidy=1,
671+
population_size=20,
672+
sequence_length=100,
673+
recombination_rate=0.01,
674+
random_seed=123,
675+
)
676+
assert ts.num_trees >= 2
677+
x = np.linspace(0, 1, ts.num_samples)
678+
w = np.linspace(0, ts.sequence_length, 11)[[0, 3, 4, -1]]
679+
a = ts.genetic_relatedness_vector(x, windows=w, mode=mode) # default is True
680+
ax = ts.genetic_relatedness_vector(x, windows=w, mode=mode, span_normalise=True)
681+
assert np.all(ax == a)
682+
b = ts.genetic_relatedness_vector(x, windows=w, span_normalise=False, mode=mode)
683+
assert a.shape == b.shape
684+
dw = np.diff(w)
685+
for aa, bb, ww in zip(a, b, dw, strict=True):
686+
assert np.allclose(aa * ww, bb)
687+
628688
@pytest.mark.parametrize("n", [2, 3, 5, 15])
629689
@pytest.mark.parametrize("centre", (True, False))
630690
def test_single_balanced_tree(self, n, centre):
@@ -680,10 +740,16 @@ def test_missing_flanks(self, centre, num_windows):
680740

681741
@pytest.mark.parametrize("ts", get_example_tree_sequences())
682742
@pytest.mark.parametrize("centre", (True, False))
683-
def test_suite_examples(self, ts, centre):
743+
def test_suite_examples_centre(self, ts, centre):
684744
if ts.num_samples > 0:
685745
check_relatedness_vector(ts, centre=centre)
686746

747+
@pytest.mark.parametrize("ts", get_example_tree_sequences())
748+
@pytest.mark.parametrize("span_normalise", (True, False))
749+
def test_suite_examples_span_normalise(self, ts, span_normalise):
750+
if ts.num_samples > 0:
751+
check_relatedness_vector(ts, span_normalise=span_normalise)
752+
687753
@pytest.mark.parametrize("n", [2, 3, 10])
688754
def test_dangling_on_samples(self, n):
689755
# Adding non sample branches below the samples does not alter
@@ -750,21 +816,21 @@ def pca(ts, windows, centre, samples=None, individuals=None, time_windows=None):
750816
if drop_dimension:
751817
windows = [0, ts.sequence_length]
752818
if time_windows is None:
753-
Sigma = relatedness_matrix(ts=ts, windows=windows, centre=False)[:, ii, :][
754-
:, :, ii
755-
]
819+
Sigma = relatedness_matrix(
820+
ts=ts, windows=windows, centre=False, span_normalise=False
821+
)[:, ii, :][:, :, ii]
756822
else:
757823
assert time_windows[0] < time_windows[1]
758824
ts_low, ts_high = (
759825
ts.decapitate(time_windows[0]),
760826
ts.decapitate(time_windows[1]),
761827
)
762-
Sigma_low = relatedness_matrix(ts=ts_low, windows=windows, centre=False)[
763-
:, ii, :
764-
][:, :, ii]
765-
Sigma_high = relatedness_matrix(ts=ts_high, windows=windows, centre=False)[
766-
:, ii, :
767-
][:, :, ii]
828+
Sigma_low = relatedness_matrix(
829+
ts=ts_low, windows=windows, centre=False, span_normalise=False
830+
)[:, ii, :][:, :, ii]
831+
Sigma_high = relatedness_matrix(
832+
ts=ts_high, windows=windows, centre=False, span_normalise=False
833+
)[:, ii, :][:, :, ii]
768834
Sigma = Sigma_high - Sigma_low
769835
if individuals is not None:
770836
ni = len(individuals)

python/tskit/trees.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9088,6 +9088,7 @@ def _genetic_relatedness_vector_node(
90889088
mode=mode,
90899089
centre=False,
90909090
nodes=indices,
9091+
span_normalise=False, # <- non-default!
90919092
)[0]
90929093
x = x - x.mean(axis=0) if centre else x
90939094

@@ -9118,6 +9119,7 @@ def _genetic_relatedness_vector_individual(
91189119
mode=mode,
91199120
centre=False,
91209121
nodes=samples,
9122+
span_normalise=False, # <- non-default!
91219123
)[0]
91229124

91239125
def bincount_fn(w):

0 commit comments

Comments
 (0)