Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
:return: A numpy array mapping node IDs in the input tables to their
corresponding node IDs in the output tables.
:rtype: numpy.ndarray (dtype=np.int32)
"""
if filter_zero_mutation_sites is not None:
# Deprecated in 0.6.1.
warnings.warn(
"filter_zero_mutation_sites is deprecated; use filter_sites instead",
DeprecationWarning)
filter_sites = filter_zero_mutation_sites
if samples is None:
flags = self.nodes.flags
samples = np.where(
np.bitwise_and(flags, _tskit.NODE_IS_SAMPLE) != 0)[0].astype(np.int32)
else:
samples = util.safe_np_int_cast(samples, np.int32)
return self.ll_tables.simplify(
samples, filter_sites=filter_sites,
filter_individuals=filter_individuals,
filter_populations=filter_populations,
reduce_to_site_topology=reduce_to_site_topology,
keep_unary=keep_unary)
def add_ancestor(self, start, end, time, focal_sites, haplotype):
"""
Adds an ancestor with the specified haplotype, with ancestral material over the
interval [start:end], that is associated with the specified timepoint and has new
mutations at the specified list of focal sites. Ancestors should be added in time
order, with the oldest first. The id of the added ancestor is returned.
"""
self._check_build_mode()
haplotype = tskit.util.safe_np_int_cast(haplotype, dtype=np.int8, copy=True)
focal_sites = tskit.util.safe_np_int_cast(
focal_sites, dtype=np.int32, copy=True
)
if start < 0:
raise ValueError("Start must be >= 0")
if end > self.num_sites:
raise ValueError("end must be <= num_sites")
if start >= end:
raise ValueError("start must be < end")
if haplotype.shape != (end - start,):
raise ValueError("haplotypes incorrect shape.")
if np.any(haplotype >= self._num_alleles[start:end]):
raise ValueError("haplotype values must be < num_alleles.")
if np.any(focal_sites < start) or np.any(focal_sites >= end):
raise ValueError("focal sites must be between start and end")
if time <= 0:
raise ValueError("time must be > 0")
def delete_sites(self, site_ids, record_provenance=True):
"""
Remove the specified sites entirely from the sites and mutations tables in this
collection. This is identical to :meth:`TreeSequence.delete_sites` but acts
*in place* to alter the data in this :class:`TableCollection`.
:param list[int] site_ids: A list of site IDs specifying the sites to remove.
:param bool record_provenance: If ``True``, add details of this operation
to the provenance table in this TableCollection. (Default: ``True``).
"""
keep_sites = np.ones(len(self.sites), dtype=bool)
site_ids = util.safe_np_int_cast(site_ids, np.int32)
if np.any(site_ids < 0) or np.any(site_ids >= len(self.sites)):
raise ValueError("Site ID out of bounds")
keep_sites[site_ids] = 0
new_as, new_as_offset = keep_with_offset(
keep_sites, self.sites.ancestral_state,
self.sites.ancestral_state_offset)
new_md, new_md_offset = keep_with_offset(
keep_sites, self.sites.metadata, self.sites.metadata_offset)
self.sites.set_columns(
position=self.sites.position[keep_sites],
ancestral_state=new_as,
ancestral_state_offset=new_as_offset,
metadata=new_md,
metadata_offset=new_md_offset)
# We also need to adjust the mutations table, as it references into sites
keep_mutations = keep_sites[self.mutations.site]
alleles cannot have ``inference`` (below) set to ``True``. If not
specified or None, defaults to ["0", "1"].
:param dict metadata: A JSON encodable dict-like object containing
metadata that is to be associated with this site.
:param float time: The time of occurence (pastwards) of the mutation to the
derived state at this site. If not specified or None, the frequency of the
derived alleles (i.e., the proportion of non-zero values in the genotypes,
out of all the non-missing values) will be used in inference. For
biallelic sites this frequency should provide a reasonable estimate
of the relative time, as used to order ancestral haplotypes during the
inference process. For sites not used in inference, such as singletons or
sites with more than two alleles, the value is unused. Defaults to None.
:return: The ID of the newly added site.
:rtype: int
"""
genotypes = tskit.util.safe_np_int_cast(genotypes, dtype=np.int8)
self._check_build_mode()
if self._build_state == self.ADDING_POPULATIONS:
if genotypes.shape[0] == 0:
# We could just raise an error here but we set the state
# here so that we can raise the same error as other
# similar conditions.
self._build_state = self.ADDING_SAMPLES
else:
# Add in the default haploid samples.
for _ in range(genotypes.shape[0]):
self.add_individual()
if self._build_state == self.ADDING_SAMPLES:
self._individuals_writer.flush()
self._samples_writer.flush()
self._alloc_site_writer()
self._build_state = self.ADDING_SITES