@@ -5596,6 +5596,7 @@ def alignments(
55965596 samples = None ,
55975597 left = None ,
55985598 right = None ,
5599+ isolated_as_missing = None ,
55995600 ):
56005601 """
56015602 Returns an iterator over the full sequence alignments for the defined samples
@@ -5660,7 +5661,9 @@ def alignments(
56605661 currently supported by this method and it will raise a ValueError
56615662 if called on tree sequences containing isolated samples.
56625663 See https://github.com/tskit-dev/tskit/issues/1896 for more
5663- information.
5664+ information. If you wish to include non-sample nodes (e.g., internal
5665+ ARG nodes) in the output, set ``isolated_as_missing=False`` to opt out
5666+ of this guard and impute missing data as the ancestral state.
56645667
56655668 See also the :meth:`.variants` iterator for site-centric access
56665669 to sample genotypes and :meth:`.haplotypes` for access to sample sequences
@@ -5679,6 +5682,11 @@ def alignments(
56795682 (default) alignments start at 0.
56805683 :param int right: Alignments will stop before this genomic position. If ``None``
56815684 (default) alignments will continue until the end of the tree sequence.
5685+ :param bool isolated_as_missing: If True (default), isolated samples without
5686+ mutations are treated as missing data and this method raises an error
5687+ when any are detected. If False, missing data are imputed with the
5688+ ancestral state, which also permits alignments to be generated for
5689+ non-sample nodes such as internal ARG nodes.
56825690 :return: An iterator over the alignment strings for specified samples in
56835691 this tree sequence, in the order given in ``samples``.
56845692 :rtype: collections.abc.Iterable
@@ -5693,6 +5701,8 @@ def alignments(
56935701 missing_data_character = (
56945702 "N" if missing_data_character is None else missing_data_character
56955703 )
5704+ if isolated_as_missing is None :
5705+ isolated_as_missing = True
56965706
56975707 L = interval .span
56985708 a = np .empty (L , dtype = np .int8 )
@@ -5730,7 +5740,9 @@ def alignments(
57305740 # incorrectly if have a sample isolated over the region (a, b],
57315741 # and if we have sites at each position from a to b, and at
57325742 # each site there is a mutation over the isolated sample.
5733- if any (tree ._has_isolated_samples () for tree in self .trees ()):
5743+ if isolated_as_missing and any (
5744+ tree ._has_isolated_samples () for tree in self .trees ()
5745+ ):
57345746 raise ValueError (
57355747 "Missing data not currently supported in alignments; see "
57365748 "https://github.com/tskit-dev/tskit/issues/1896 for details."
@@ -5741,6 +5753,7 @@ def alignments(
57415753 interval = interval ,
57425754 missing_data_character = missing_data_character ,
57435755 samples = samples ,
5756+ isolated_as_missing = isolated_as_missing ,
57445757 )
57455758 site_pos = self .sites_position .astype (np .int64 )[
57465759 first_site_id : last_site_id + 1
0 commit comments