Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_omit_gap_pos2(self):
"""test removal of all gaps (any entries in alignment column are gaps)"""
alignment = make_aligned_seqs(
data={
"seq1": "--ACGT--GT---",
"seq2": "--ACGTA-GT---",
"seq3": "--ACGTA-GT---",
}
)
align_dict = alignment.omit_gap_pos(allowed_gap_frac=0).to_dict()
self.assertEqual(
align_dict, {"seq1": "ACGTGT", "seq2": "ACGTGT", "seq3": "ACGTGT"}
)
alignment = make_aligned_seqs(
data={"seq1": "ACGT", "seq2": "----", "seq3": "----"}
)
result = alignment.omit_gap_pos(allowed_gap_frac=0)
self.assertEqual(result, None)
def test_sample_with_replacement(self):
# test with replacement
alignment = make_aligned_seqs(data={"seq1": "gatc", "seq2": "gatc"})
sample = alignment.sample(1000, with_replacement=True)
def test_position_specific_mprobs(self):
"""correctly compute likelihood when positions have distinct
probabilities"""
aln_len = len(self.aln)
posn1 = []
posn2 = []
for name, seq in list(self.aln.to_dict().items()):
p1 = [seq[i] for i in range(0, aln_len, 2)]
p2 = [seq[i] for i in range(1, aln_len, 2)]
posn1.append([name, "".join(p1)])
posn2.append([name, "".join(p2)])
# the position specific alignments
posn1 = make_aligned_seqs(data=posn1)
posn2 = make_aligned_seqs(data=posn2)
# a newQ dinucleotide model
sm = TimeReversibleNucleotide(motif_length=2, mprob_model="monomer")
lf = sm.make_likelihood_function(self.tree)
lf.set_alignment(posn1)
posn1_lnL = lf.get_log_likelihood()
lf.set_alignment(posn2)
posn2_lnL = lf.get_log_likelihood()
expect_lnL = posn1_lnL + posn2_lnL
# the joint model
lf.set_alignment(self.aln)
aln_lnL = lf.get_log_likelihood()
# setting the full alignment, which has different motif probs, should
# produce a different lnL
def test_alt_hyp_fail_error(self):
"""if alt fails NotCompleted.origin should be model"""
_data = {
"Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGA",
"Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGA",
"Opossum": "TGACCAGTGAAAGTGGCGGCGGTGGCTGA",
}
aln = make_aligned_seqs(data=_data, moltype="dna")
tree = "(Mouse,Human,Opossum)"
m1 = evo_app.model("F81", tree=tree)
m2 = evo_app.model("MG94HKY", tree=tree)
hyp = evo_app.hypothesis(m1, m2)
r = hyp(aln)
self.assertEqual(r.origin, "model")
def test_paralinear_distance(self):
"""calculate paralinear variance consistent with hand calculation"""
data = [
(
"seq1",
"GGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
),
(
"seq2",
"TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC",
),
]
aln = make_aligned_seqs(data=data, moltype=DNA)
paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
paralinear_calc.run(show_progress=False)
index = dict(list(zip("ACGT", list(range(4)))))
J = numpy.zeros((4, 4))
for p in zip(data[0][1], data[1][1]):
J[index[p[0]], index[p[1]]] += 1
for i in range(4):
if J[i, i] == 0:
J[i, i] += 0.5
J /= J.sum()
M = numpy.linalg.inv(J)
f = J.sum(1), J.sum(0)
dist = -0.25 * numpy.log(
numpy.linalg.det(J) / numpy.sqrt(f[0].prod() * f[1].prod())
)
def test_model_result_setitem(self):
"""TypeError if value a likelihood function, or a dict with correct type"""
v = dict(type="arbitrary")
r = model_result(name="one", source="two")
with self.assertRaises(TypeError):
r["name"] = v
with self.assertRaises(TypeError):
r["name"] = 4
_data = {
"Human": "ATGCGGCTCGCGGAGGCCGCGCTCGCGGAG",
"Mouse": "ATGCCCGGCGCCAAGGCAGCGCTGGCGGAG",
"Opossum": "ATGCCAGTGAAAGTGGCGGCGGTGGCTGAG",
}
aln = make_aligned_seqs(data=_data, moltype="dna")
with self.assertRaises(TypeError):
r["name"] = aln
def test_simulate_alignment2(self):
"Simulate alignment with dinucleotide model"
al = make_aligned_seqs(data={"a": "ggaatt", "c": "cctaat"})
t = make_tree("(a,c);")
sm = substitution_model.TimeReversibleDinucleotide(mprob_model="tuple")
lf = sm.make_likelihood_function(t)
lf.set_alignment(al)
simalign = lf.simulate_alignment()
self.assertEqual(len(simalign), 6)
def test_slice_align(self):
"""test slicing of sequences"""
alignment = make_aligned_seqs(
data={"seq1": "ACGTACGT", "seq2": "ACGTACGT", "seq3": "ACGTACGT"}
)
sub_align = alignment[2:5]
self.assertEqual(len(sub_align), 3)
self.assertEqual(len(sub_align.names), 3)
self.assertEqual(
sub_align.to_dict(), {"seq1": "GTA", "seq2": "GTA", "seq3": "GTA"}
)
sub_align = alignment[5:20]
self.assertEqual(len(sub_align), 3)
self.assertEqual(len(sub_align.names), 3)
self.assertEqual(
sub_align.to_dict(), {"seq1": "CGT", "seq2": "CGT", "seq3": "CGT"}
)
for merged_name, orig_name in list(aln1_name_map.items()):
result[merged_name] = alignment1.get_gapped_seq(
orig_name
) + alignment2.get_gapped_seq(aln2_name_map[merged_name])
except ValueError: # Differing MolTypes
for merged_name, orig_name in list(aln1_name_map.items()):
result[merged_name] = Sequence(
alignment1.get_gapped_seq(orig_name)
) + Sequence(alignment2.get_gapped_seq(aln2_name_map[merged_name]))
except KeyError as e:
raise KeyError(
"A sequence identifier is in alignment2 "
+ "but not alignment1 -- did you filter out sequences identifiers"
+ " not common to both alignments?"
)
return make_aligned_seqs(result, array_align=True)
def get_alignment(self):
from cogent3 import make_aligned_seqs
return make_aligned_seqs(self.aligneds)