Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for e in ts.edges():
node = ts.node(e.parent)
t = node.time - 1e-14 # Arbitrary small value.
next_node = len(tables.nodes)
tables.nodes.add_row(time=t, population=node.population)
edges.append(msprime.Edge(
left=e.left, right=e.right, parent=next_node, child=e.child))
node_times[next_node] = t
edges.append(msprime.Edge(
left=e.left, right=e.right, parent=e.parent, child=next_node))
edges.sort(key=lambda e: node_times[e.parent])
tables.edges.reset()
for e in edges:
tables.edges.add_row(
left=e.left, right=e.right, child=e.child, parent=e.parent)
ts_new = msprime.load_tables(**tables.asdict())
self.assertGreater(ts_new.num_edges, ts.num_edges)
self.assert_haplotypes_equal(ts, ts_new)
self.assert_variants_equal(ts, ts_new)
ts_simplified = ts_new.simplify()
self.assertEqual(list(ts_simplified.records()), list(ts.records()))
self.assert_haplotypes_equal(ts, ts_simplified)
self.assert_variants_equal(ts, ts_simplified)
self.assertEqual(len(list(ts.edge_diffs())), ts.num_trees)
ts = msprime.simulate(
15, recombination_rate=5, random_seed=self.random_seed, mutation_rate=5)
self.assertGreater(ts.num_trees, 2)
self.assertGreater(ts.num_mutations, 2)
tables = ts.dump_tables()
next_node = ts.num_nodes
tables.edges.reset()
for e in ts.edges():
tables.edges.add_row(e.left, e.right, e.parent, e.child)
tables.edges.add_row(e.left, e.right, e.parent, next_node)
tables.nodes.add_row(time=0)
next_node += 1
msprime.sort_tables(
nodes=tables.nodes, edges=tables.edges, sites=tables.sites,
mutations=tables.mutations)
ts_new = msprime.load_tables(
nodes=tables.nodes, edges=tables.edges, sites=tables.sites,
mutations=tables.mutations)
self.assertEqual(ts_new.num_nodes, next_node)
self.assertEqual(ts_new.sample_size, ts.sample_size)
self.assert_haplotypes_equal(ts, ts_new)
self.assert_variants_equal(ts, ts_new)
ts_simplified = ts_new.simplify()
self.assertEqual(ts_simplified.num_nodes, ts.num_nodes)
self.assertEqual(ts_simplified.sample_size, ts.sample_size)
self.assertEqual(list(ts_simplified.records()), list(ts.records()))
self.assert_haplotypes_equal(ts, ts_simplified)
self.assert_variants_equal(ts, ts_simplified)
lefts = [r.left for r in edges]
rights = [r.right for r in edges]
do_lefts = [lefts[0]]
do_rights = []
for k in range(len(lefts)-1):
if lefts[k+1] != rights[k]:
do_lefts.append(lefts[k+1])
do_rights.append(rights[k])
do_rights.append(rights[-1])
print(child, " :: ", do_lefts, " ---- ", do_rights)
assert len(do_lefts) == len(do_rights)
for k in range(len(do_lefts)):
tables.edgesets.add_row(
left=do_lefts[k], right=do_rights[k], children=(child,), parent=u)
print(tables.edgesets)
new_ts = msprime.load_tables(**tables._asdict())
return new_ts
def test_small_tree_mutations_over_root(self):
ts = msprime.load_text(
nodes=six.StringIO(self.small_tree_ex_nodes),
edges=six.StringIO(self.small_tree_ex_edges), strict=False)
tables = ts.dump_tables()
tables.sites.add_row(position=0.25, ancestral_state="0")
tables.mutations.add_row(site=0, node=8, derived_state="1")
ts = msprime.load_tables(
nodes=tables.nodes, edges=tables.edges, sites=tables.sites,
mutations=tables.mutations)
self.assertEqual(ts.num_sites, 1)
self.assertEqual(ts.num_mutations, 1)
for filt in [True, False]:
tss, _ = self.do_simplify(ts, [0, 1], filter_zero_mutation_sites=filt)
self.assertEqual(tss.num_sites, 1)
self.assertEqual(tss.num_mutations, 1)
def test_break_single_tree(self):
# Take a single largish tree from msprime, and remove the oldest record.
# This breaks it into two subtrees.
ts = msprime.simulate(20, random_seed=self.random_seed, mutation_rate=4)
self.assertGreater(ts.num_mutations, 5)
tables = ts.dump_tables()
tables.edges.set_columns(
left=tables.edges.left[:-1],
right=tables.edges.right[:-1],
parent=tables.edges.parent[:-1],
child=tables.edges.child[:-1])
ts_new = msprime.load_tables(**tables.asdict())
self.assertEqual(ts.sample_size, ts_new.sample_size)
self.assertEqual(ts.num_edges, ts_new.num_edges + 1)
self.assertEqual(ts.num_trees, ts_new.num_trees)
self.assert_haplotypes_equal(ts, ts_new)
self.assert_variants_equal(ts, ts_new)
roots = set()
t_new = next(ts_new.trees())
for u in ts_new.samples():
while t_new.parent(u) != msprime.NULL_NODE:
u = t_new.parent(u)
roots.add(u)
self.assertEqual(len(roots), 2)
self.assertEqual(sorted(roots), sorted(t_new.roots))
"""
Verifies that if we run simplify on the specified input we get the
required output.
"""
b_nodes = msprime.parse_nodes(six.StringIO(nodes_before), strict=False)
b_edges = msprime.parse_edges(six.StringIO(edges_before), strict=False)
if sites_before is not None:
b_sites = msprime.parse_sites(six.StringIO(sites_before), strict=False)
else:
b_sites = msprime.SiteTable()
if mutations_before is not None:
b_mutations = msprime.parse_mutations(
six.StringIO(mutations_before), strict=False)
else:
b_mutations = msprime.MutationTable()
ts = msprime.load_tables(
nodes=b_nodes, edges=b_edges, sites=b_sites, mutations=b_mutations)
# Make sure it's a valid topology. We want to be sure we evaluate the
# whole iterator
for t in ts.trees():
self.assertTrue(t is not None)
msprime.simplify_tables(
samples=samples, nodes=b_nodes, edges=b_edges, sites=b_sites,
mutations=b_mutations, filter_zero_mutation_sites=filter_zero_mutation_sites)
a_nodes = msprime.parse_nodes(six.StringIO(nodes_after), strict=False)
a_edges = msprime.parse_edges(six.StringIO(edges_after), strict=False)
if sites_after is not None:
a_sites = msprime.parse_sites(six.StringIO(sites_after), strict=False)
else:
a_sites = msprime.SiteTable()
if mutations_after is not None:
a_mutations = msprime.parse_mutations(
n_used, Ne_used, recombination_map=recombination_map, mutation_rate=mutation_rate,
random_seed=sim_seed, **kwargs)
else:
#run with no mutations (should give same result regardless of mutation rate)
ts = msprime.simulate(
n_used, Ne_used, recombination_map=recombination_map, mutation_rate=0,
random_seed=sim_seed, **kwargs)
#now add the mutations
rng2 = msprime.RandomGenerator(mut_seed)
muts = msprime.MutationTable()
tables = ts.dump_tables()
mutgen = msprime.MutationGenerator(rng2, mutation_rate)
mutgen.generate(tables.nodes, tables.edges, tables.sites, muts)
msprime.sort_tables(
nodes=tables.nodes, edges=tables.edges, sites=tables.sites, mutations=muts)
ts = msprime.load_tables(nodes=nodes, edges=edges, sites=sites, mutations=muts)
logging.info(
"Neutral simulation done; {} sites, {} trees".format(ts.num_sites, ts.num_trees))
sim_fn = mk_sim_name(sample_size, Ne, length, recombination_rate, mutation_rate, seed, mut_seed, self.simulations_dir)
logging.debug("writing {}.trees".format(sim_fn))
ts.dump(sim_fn+".trees")
# Make sure that there is *some* information in this simulation that can be used
# to infer a ts, otherwise it's pointless
if ts.get_num_mutations() == 0:
raise ValueError("No mutations present")
if ts_has_non_singleton_variants(ts) == False:
raise ValueError("No non-singleton variants present ({} singletons)".format(
sum([np.sum(v.genotypes)==1 for v in ts.variants()])))
return ts, sim_fn
tree_sequence_builder.dump_mutations(
site=site, node=node, derived_state=derived_state)
# Convert from 0/1 to '0'/'1' chars
derived_state += ord('0')
mutations = msprime.MutationTable()
mutations.set_columns(
site=site, node=node, derived_state=derived_state,
derived_state_length=derived_state_length)
del site, node, derived_state, derived_state_length
# print(nodes)
# print(edgesets)
# print(sites)
# print(mutations)
ts = msprime.load_tables(
nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations)
# simplified = ts.simplify()
# simplified.dump_tables(nodes=nodes, edgesets=edgesets)
# print("Simplified")
# print(nodes)
# print(edgesets)
return ts