Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def validate(self, ts):
self.assertIsNone(ts.file_uuid)
ts.dump(self.temp_file)
other_ts = tskit.load(self.temp_file)
self.assertIsNotNone(other_ts.file_uuid)
self.assertTrue(len(other_ts.file_uuid), 36)
uuid = other_ts.file_uuid
other_ts = tskit.load(self.temp_file)
self.assertEqual(other_ts.file_uuid, uuid)
self.assertEqual(ts.tables, other_ts.tables)
# Check that the UUID is well-formed.
parsed = _uuid.UUID("{" + uuid + "}")
self.assertEqual(str(parsed), uuid)
# Save the same tree sequence to the file. We should get a different UUID.
ts.dump(self.temp_file)
other_ts = tskit.load(self.temp_file)
self.assertIsNotNone(other_ts.file_uuid)
self.assertNotEqual(other_ts.file_uuid, uuid)
# Even saving a ts that has a UUID to another file changes the UUID
old_uuid = other_ts.file_uuid
other_ts.dump(self.temp_file)
def verify_output(self, output_path):
output_trees = tskit.load(output_path)
self.assertEqual(output_trees.num_samples, self.input_ts.num_samples)
self.assertEqual(output_trees.sequence_length, self.input_ts.sequence_length)
self.assertEqual(output_trees.num_sites, self.input_ts.num_sites)
self.assertGreater(output_trees.num_sites, 1)
self.assertTrue(
np.array_equal(
output_trees.genotype_matrix(), self.input_ts.genotype_matrix()
)
def get_0_3_slim_examples(self):
for filename in ['tests/examples/recipe_WF.v3.3.1.trees',
'tests/examples/recipe_nonWF.v3.3.1.trees']:
with self.assertWarns(Warning):
yield tskit.load(filename)
ts.dump(self.temp_file)
self.assertTrue(os.path.exists(self.temp_file))
self.assertGreater(os.path.getsize(self.temp_file), 0)
self.verify_keys(ts)
store = kastore.load(self.temp_file)
# Check the basic root attributes
format_name = store['format/name']
self.assertTrue(np.array_equal(
np.array(bytearray(b"tskit.trees"), dtype=np.int8), format_name))
format_version = store['format/version']
self.assertEqual(format_version[0], CURRENT_FILE_MAJOR)
self.assertEqual(format_version[1], 0)
self.assertEqual(ts.sequence_length, store['sequence_length'][0])
# Load another copy from file so we can check the uuid.
other_ts = tskit.load(self.temp_file)
self.verify_uuid(other_ts, store["uuid"].tobytes().decode())
tables = ts.tables
self.assertTrue(np.array_equal(
tables.individuals.flags, store["individuals/flags"]))
self.assertTrue(np.array_equal(
tables.individuals.location, store["individuals/location"]))
self.assertTrue(np.array_equal(
tables.individuals.location_offset, store["individuals/location_offset"]))
self.assertTrue(np.array_equal(
tables.individuals.metadata, store["individuals/metadata"]))
self.assertTrue(np.array_equal(
tables.individuals.metadata_offset, store["individuals/metadata_offset"]))
self.assertTrue(np.array_equal(tables.nodes.flags, store["nodes/flags"]))
def run_simplify(args):
ts = tskit.load(args.input)
ts = ts.simplify()
ts.dump(args.output)
def run_dump_provenances(args):
tree_sequence = tskit.load(args.tree_sequence)
if args.human:
for provenance in tree_sequence.provenances():
d = json.loads(provenance.record)
print("id={}, timestamp={}, record={}".format(
provenance.id, provenance.timestamp, json.dumps(d, indent=4)))
else:
tree_sequence.dump_text(provenances=sys.stdout)
def run_match_samples(args):
setup_logging(args)
sample_data = tsinfer.SampleData.load(args.samples)
ancestors_trees = get_ancestors_trees_path(args.ancestors_trees, args.samples)
output_trees = get_output_trees_path(args.output_trees, args.samples)
logger.info("Loading ancestral genealogies from {}".format(ancestors_trees))
ancestors_trees = tskit.load(ancestors_trees)
progress_monitor = ProgressMonitor(enabled=args.progress, match_samples=True)
ts = tsinfer.match_samples(
sample_data,
ancestors_trees,
num_threads=args.num_threads,
path_compression=not args.no_path_compression,
simplify=not args.no_simplify,
progress_monitor=progress_monitor,
)
logger.info("Writing output tree sequence to {}".format(output_trees))
ts.dump(output_trees)
summarise_usage()
def get_1kg_sample_edges():
filename = os.path.join(data_prefix, "1kg_chr20.nosimplify.trees")
ts = tskit.load(filename)
print("TGP")
print_sample_edge_stats(ts)
population_name = []
population_region = []
for pop in ts.populations():
md = json.loads(pop.metadata.decode())
population_name.append(md["name"])
population_region.append(md["super_population"])
tables = ts.tables
child_counts = np.bincount(tables.edges.child)
datasets = []
samples = []
strands = []
populations = []
regions = []
def load(cls, path):
'''
Load a :class:`SlimTreeSequence` from a .trees file on disk.
:param string path: The path to a .trees file.
:rtype SlimTreeSequence:
'''
ts = tskit.load(path)
# extract the reference sequence from the kastore
kas = kastore.load(path)
if 'reference_sequence/data' in kas:
int_rs = kas['reference_sequence/data']
reference_sequence = int_rs.tostring().decode('ascii')
else:
reference_sequence = None
return cls(ts, reference_sequence)
def run_list(args):
setup_logging(args)
# First try to load with tskit.
ts = None
try:
ts = tskit.load(args.path)
except tskit.FileFormatError:
pass
if ts is None:
tsinfer_file = tsinfer.load(args.path)
if args.storage:
print(tsinfer_file.info)
else:
print(tsinfer_file)
else:
summarise_tree_sequence(args.path, ts)