Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
fasta.set_sequences(file2, seq_dict)
seq_dict2 = fasta.get_sequences(file2)
assert seq_dict == seq_dict2
file3 = fasta.FastaFile()
fasta.set_sequence(file3, seq.NucleotideSequence("AACCTTGG"))
assert file3["sequence"] == "AACCTTGG"
path = os.path.join(data_dir("sequence"), "prot.fasta")
file4 = fasta.FastaFile.read(path)
assert seq.ProteinSequence("YAHGFRTGS") == fasta.get_sequence(file4)
path = os.path.join(data_dir("sequence"), "invalid.fasta")
file5 = fasta.FastaFile.read(path)
with pytest.raises(ValueError):
seq.NucleotideSequence(fasta.get_sequence(file5))
def test_encoding():
string1 = "AATGCGTTA"
dna = seq.NucleotideSequence(string1)
string2 = str(dna)
assert string1 == string2
def test_concatenation():
str1 = "AAGTTA"
str2 = "CGA"
str3 = "NNN"
concat_seq = seq.NucleotideSequence(str1) + seq.NucleotideSequence(str2)
assert str1 + str2 == str(concat_seq)
concat_seq = seq.NucleotideSequence(str1) + seq.NucleotideSequence(str3)
assert str1 + str3 == str(concat_seq)
concat_seq = seq.NucleotideSequence(str3) + seq.NucleotideSequence(str1)
assert str3 + str1 == str(concat_seq)
def test_align_optimal_simple(local, term, gap_penalty,
input1, input2, expect):
"""
Test `align_optimal()` function using constructed test cases.
"""
seq1 = seq.NucleotideSequence(input1)
seq2 = seq.NucleotideSequence(input2)
matrix = align.SubstitutionMatrix.std_nucleotide_matrix()
# Test alignment function
alignments = align.align_optimal(seq1, seq2,
matrix,
gap_penalty=gap_penalty, terminal_penalty=term,
local=local)
for ali in alignments:
assert str(ali) in expect
# Test if separate score function calculates the same score
for ali in alignments:
score = align.score(ali, matrix,
gap_penalty=gap_penalty, terminal_penalty=term)
assert score == ali.score
def test_access():
string = "AATGCGTTA"
dna = seq.NucleotideSequence(string)
assert string[2] == dna[2]
assert string == "".join([symbol for symbol in dna])
dna = dna[3:-2]
assert "GCGT" == str(dna)
def test_align_ungapped():
"""
Test `align_ungapped()` function.
"""
seq1 = seq.NucleotideSequence("ACCTGA")
seq2 = seq.NucleotideSequence("ACTGGT")
matrix = align.SubstitutionMatrix.std_nucleotide_matrix()
ali = align.align_ungapped(seq1, seq2, matrix)
assert ali.score == 3
assert str(ali) == "ACCTGA\nACTGGT"
def test_translation_met_start():
"""
Test whether the start amino acid is replaced by methionine,
i.e. the correct function of the 'met_start' parameter.
"""
codon_table = seq.CodonTable.default_table().with_start_codons("AAA")
dna = seq.NucleotideSequence("GAAACTGAAATAAGAAC")
proteins, _ = dna.translate(codon_table=codon_table, met_start=True)
assert [str(protein) for protein in proteins] == ["MLK*", "M*"]
# Map the amino acid to the codon with maximum frequency
opt_codons[amino_acid_code] = best_codon_code
# Fetch the streptavidin protein sequence from Streptomyces avidinii
fasta_file = fasta.FastaFile.read(
entrez.fetch("P22629", None, "fasta", "protein", "fasta")
)
strep_prot_seq = fasta.get_sequence(fasta_file)
# Create a DNA sequence from the protein sequence
# using the optimal codons
strep_dna_seq = seq.NucleotideSequence()
strep_dna_seq.code = np.concatenate(
[opt_codons[amino_acid_code] for amino_acid_code in strep_prot_seq.code]
)
# Add stop codon
strep_dna_seq += seq.NucleotideSequence("TAA")
# Put the DNA sequence into a FASTA file
fasta_file = fasta.FastaFile()
fasta_file["Codon optimized streptavidin"] = str(strep_dna_seq)
# Print the contents of the created FASTA file
print(fasta_file)
# In a real application it would be written onto the hard drive via
print(cds_seq[:60], "...")
########################################################################
# Awesome.
# Now we can translate the sequence and compare it with the translation
# given by the CDS feature.
# But before we can do that, we have to prepare the data:
# The DNA sequence uses an ambiguous alphabet due to the nasty
# ``'M'`` at position 28 of the original sequence, we have to remove the
# stop symbol after translation and we need to remove the whitespace
# characters in the translation given by the CDS feature.
# To make alphabet unambiguous we create a new NucleotideSequence
# containing only the CDS portion, which is unambiguous
# Thus, the resulting NucleotideSequence has an unambiguous alphabet
cds_seq = seq.NucleotideSequence(cds_seq)
# Now we can translate the unambiguous sequence.
prot_seq = cds_seq.translate(complete=True)
print(prot_seq[:60], "...")
print(
"Are the translated sequences equal?",
# Remove stops of our translation
(str(prot_seq.remove_stops()) ==
# Remove whitespace characters from translation given by CDS feature
cds_feature.qual["translation"].replace(" ", ""))
)
########################################################################
# Phylogenetic and guide trees
# ----------------------------
#
# .. currentmodule:: biotite.sequence.phylo
import numpy as np
import matplotlib.pyplot as plt
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
# The list of Anderson promoters
seqs = [seq.NucleotideSequence("ttgacagctagctcagtcctaggtataatgctagc"),
seq.NucleotideSequence("ttgacagctagctcagtcctaggtataatgctagc"),
seq.NucleotideSequence("tttacagctagctcagtcctaggtattatgctagc"),
seq.NucleotideSequence("ttgacagctagctcagtcctaggtactgtgctagc"),
seq.NucleotideSequence("ctgatagctagctcagtcctagggattatgctagc"),
seq.NucleotideSequence("ttgacagctagctcagtcctaggtattgtgctagc"),
seq.NucleotideSequence("tttacggctagctcagtcctaggtactatgctagc"),
seq.NucleotideSequence("tttacggctagctcagtcctaggtatagtgctagc"),
seq.NucleotideSequence("tttacggctagctcagccctaggtattatgctagc"),
seq.NucleotideSequence("ctgacagctagctcagtcctaggtataatgctagc"),
seq.NucleotideSequence("tttacagctagctcagtcctagggactgtgctagc"),
seq.NucleotideSequence("tttacggctagctcagtcctaggtacaatgctagc"),
seq.NucleotideSequence("ttgacggctagctcagtcctaggtatagtgctagc"),
seq.NucleotideSequence("ctgatagctagctcagtcctagggattatgctagc"),
seq.NucleotideSequence("ctgatggctagctcagtcctagggattatgctagc"),
seq.NucleotideSequence("tttatggctagctcagtcctaggtacaatgctagc"),
seq.NucleotideSequence("tttatagctagctcagcccttggtacaatgctagc"),
seq.NucleotideSequence("ttgacagctagctcagtcctagggactatgctagc"),
seq.NucleotideSequence("ttgacagctagctcagtcctagggattgtgctagc"),
seq.NucleotideSequence("ttgacggctagctcagtcctaggtattgtgctagc")]
# Sequences do not need to be aligned
# -> Create alignment with trivial trace
# [[0 0 0 ...]
# [1 1 1 ...]