How to use the biotite.sequence.io.fasta.FastaFile.read function in biotite

To help you get started, we’ve selected a few biotite examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

biotite-dev / biotite / tests / sequence / test_align.py View on Github

def sequences():
    """
    10 Cas9 sequences.
    """
    fasta_file = fasta.FastaFile.read(join(data_dir("sequence"), "cas9.fasta"))
    return [seq.ProteinSequence(sequence) for sequence in fasta_file.values()]

biotite-dev / biotite / tests / database / test_entrez.py View on Github

def test_fetch(common_name, as_file_like):
    path = None if as_file_like else biotite.temp_dir()
    db_name = "Protein" if common_name else "protein"
    file = entrez.fetch("1L2Y_A", path, "fa", db_name,
                        "fasta", overwrite=True)
    fasta_file = fasta.FastaFile.read(file)
    prot_seq = fasta.get_sequence(fasta_file)

biotite-dev / biotite / tests / sequence / test_fasta.py View on Github

def test_access():
    path = os.path.join(data_dir("sequence"), "nuc.fasta")
    file = fasta.FastaFile.read(path)
    assert file["dna sequence"] == "ACGCTACGT"
    assert file["another dna sequence"] == "A"
    assert file["third dna sequence"] == "ACGT"
    assert dict(file.items()) == {
        "dna sequence" : "ACGCTACGT",
        "another dna sequence" : "A",
        "third dna sequence" : "ACGT"
    }
    file["another dna sequence"] = "AA"
    del file["dna sequence"]
    file["yet another sequence"] = "ACGT"
    assert dict(file.items()) == {
        "another dna sequence" : "AA",
        "third dna sequence"   : "ACGT",
        "yet another sequence" : "ACGT"
    }

biotite-dev / biotite / tests / sequence / test_fasta.py View on Github

path = os.path.join(data_dir("sequence"), "nuc.fasta")
    file = fasta.FastaFile.read(path)
    assert seq.NucleotideSequence("ACGCTACGT") == fasta.get_sequence(file)
    
    seq_dict = fasta.get_sequences(file)
    file2 = fasta.FastaFile()
    fasta.set_sequences(file2, seq_dict)
    seq_dict2 = fasta.get_sequences(file2)
    assert seq_dict == seq_dict2
    
    file3 = fasta.FastaFile()
    fasta.set_sequence(file3, seq.NucleotideSequence("AACCTTGG"))
    assert file3["sequence"] == "AACCTTGG"
    
    path = os.path.join(data_dir("sequence"), "prot.fasta")
    file4 = fasta.FastaFile.read(path)
    assert seq.ProteinSequence("YAHGFRTGS") == fasta.get_sequence(file4)
    
    path = os.path.join(data_dir("sequence"), "invalid.fasta")
    file5 = fasta.FastaFile.read(path)
    with pytest.raises(ValueError):
        seq.NucleotideSequence(fasta.get_sequence(file5))

biotite-dev / biotite / doc / examples / scripts / sequence / luxa_comparison.py View on Github

# License: BSD 3 clause

import numpy as np
import matplotlib.pyplot as plt
import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.database.entrez as entrez


# Search for protein products of LexA gene in UniProtKB/Swiss-Prot database
query =   entrez.SimpleQuery("luxA", "Gene Name") \
        & entrez.SimpleQuery("srcdb_swiss-prot", "Properties")
uids = entrez.search(query, db_name="protein")
fasta_file = fasta.FastaFile.read(entrez.fetch_single_file(
    uids, None, db_name="protein", ret_type="fasta"
))

ids = []
sequences = []
for header, seq_str in fasta_file.items():
    # Extract the UniProt Entry name from header
    identifier = header.split("|")[-1].split()[0]
    ids.append(identifier)
    sequences.append(seq.ProteinSequence(seq_str))

matrix = align.SubstitutionMatrix.std_protein_matrix()
alignment, order, tree, distances = align.align_multiple(
    sequences, matrix, gap_penalty=(-10,-1), terminal_penalty=False
)
# Order alignment according to the guide tree

biotite-dev / biotite / doc / examples / scripts / sequence / pi3k_alignment.py View on Github

import warnings
import numpy as np
import matplotlib.pyplot as plt
import biotite
import biotite.database.entrez as entrez
import biotite.sequence as seq
import biotite.sequence.align as align
import biotite.sequence.graphics as graphics
import biotite.sequence.io.fasta as fasta
import biotite.application.clustalo as clustalo

uids  = ["5JHB_A", "5LUQ_A",   "5FLC_B", "5YZ0_A", "5NP0_A", "4FUL_A"]
names = ["PI3K",   "DNA-PKcs", "mTOR",   "ATR",    "ATM",    "hSMG-1"]

sequences = []
file = fasta.FastaFile.read(entrez.fetch_single_file(
    uids, None, db_name="protein", ret_type="fasta"
))
for header, seq_str in file.items():
    sequences.append(seq.ProteinSequence(seq_str))

alignment = clustalo.ClustalOmegaApp.align(sequences)

########################################################################
# Since the sequences are relatively long, the display of the entire
# alignment using a :func:`plot_alignment` would be too big.
# Instead we use a heat map, which highlights the similarity in the
# alignment column using a color map.
#
# Like the :class:`LetterSimilarityPlotter` we will use the
# *average normalized similarity* as measure.

biotite-dev / biotite / doc / examples / scripts / sequence / orf_identification.py View on Github

Since we want to perform a six-frame translation we have to look at
the complementary strand of the genome as well.
"""

# Code source: Patrick Kunzmann
# License: BSD 3 clause

import biotite.sequence as seq
import biotite.sequence.io.fasta as fasta
import biotite.database.entrez as entrez
import matplotlib.pyplot as plt

# Download Porcine circovirus genome
file = entrez.fetch("KP282147", None, "fa", "nuccore", "fasta")
fasta_file = fasta.FastaFile.read(file)
genome = fasta.get_sequence(fasta_file)
# Perform translation for forward strand
proteins, positions = genome.translate()
print("Forward strand:")
for i in range(len(proteins)):
    print("{:4d} - {:4d}:   {:}"
          .format(positions[i][0], positions[i][1], str(proteins[i])))
print("\n")
# Perform translation for complementary strand
genome_rev = genome.reverse().complement()
proteins, positions = genome_rev.translate()
print("Reverse strand:")
for i in range(len(proteins)):
    print("{:5d} - {:5d}:   {:}"
          .format(positions[i][0], positions[i][1], str(proteins[i])))

biotite-dev / biotite / doc / examples / scripts / sequence / bionigma_alignment.py View on Github

for ax in (axes, twin):
        ax.set_yticklabels(ax.get_yticklabels(), fontdict={"color":"white"})
    axes.get_figure().patch.set_facecolor("#181818")




# Using cyclotide sequences as example
query = (
    entrez.SimpleQuery("Cyclotide") &
    entrez.SimpleQuery("cter") &
    entrez.SimpleQuery("srcdb_swiss-prot", field="Properties") ^
    entrez.SimpleQuery("Precursor")
)
uids = entrez.search(query, "protein")
fasta_file = fasta.FastaFile.read(
    entrez.fetch_single_file(uids, None, "protein", "fasta")
)
sequence_dict = fasta.get_sequences(fasta_file)
headers = list(sequence_dict.keys())
sequences = list(sequence_dict.values())
labels = [header[-1] for header in headers]

# Perform a multiple sequence alignment
matrix = align.SubstitutionMatrix.std_protein_matrix()
alignment, order, _, _ = align.align_multiple(sequences, matrix)
# Order alignment according to guide tree
alignment = alignment[:, order.tolist()]
labels = [labels[i] for i in order]

# Visualize the alignment using the new alignment plotter
fig = plt.figure(figsize=(8.0, 3.7))

biotite-dev / biotite / doc / examples / scripts / sequence / codon_usage.py View on Github

# Again, symbol codes are used here
opt_codons = {}
for amino_acid_code in range(20):
    codon_codes_for_aa = table[amino_acid_code]
    # Find codon with maximum frequency
    max_freq = 0
    best_codon_code = None
    for codon_code in codon_codes_for_aa:
        if codon_counter[codon_code] > max_freq:
            max_freq = codon_counter[codon_code]
            best_codon_code = codon_code
    # Map the amino acid to the codon with maximum frequency
    opt_codons[amino_acid_code] = best_codon_code

# Fetch the streptavidin protein sequence from Streptomyces avidinii
fasta_file = fasta.FastaFile.read(
    entrez.fetch("P22629", None, "fasta", "protein", "fasta")
)
strep_prot_seq = fasta.get_sequence(fasta_file)
# Create a DNA sequence from the protein sequence
# using the optimal codons
strep_dna_seq = seq.NucleotideSequence()
strep_dna_seq.code = np.concatenate(
    [opt_codons[amino_acid_code] for amino_acid_code in strep_prot_seq.code]
)
# Add stop codon
strep_dna_seq += seq.NucleotideSequence("TAA")
# Put the DNA sequence into a FASTA file
fasta_file = fasta.FastaFile()
fasta_file["Codon optimized streptavidin"] = str(strep_dna_seq)
# Print the contents of the created FASTA file
print(fasta_file)

How to use the biotite.sequence.io.fasta.FastaFile.read function in biotite

To help you get started, we’ve selected a few biotite examples, based on popular ways it is used in public projects.

biotite

Package Health Score

Popular biotite functions

Similar packages