Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_array_conversion(path, single_model):
model = 1 if single_model else None
mmtf_file = mmtf.MMTFFile.read(path)
a1 = mmtf.get_structure(mmtf_file, model=model, include_bonds=True)
mmtf_file = mmtf.MMTFFile()
mmtf.set_structure(mmtf_file, a1)
temp = TemporaryFile("w+b")
mmtf_file.write(temp)
temp.seek(0)
mmtf_file = mmtf.MMTFFile.read(temp)
temp.close()
a2 = mmtf.get_structure(mmtf_file, model=model, include_bonds=True)
for category in a1.get_annotation_categories():
assert a1.get_annotation(category).tolist() == \
a2.get_annotation(category).tolist()
assert a1.coord.flatten().tolist() == \
def test_pdbx_consistency(path, single_model):
model = None if single_model else 1
cif_path = splitext(path)[0] + ".cif"
mmtf_file = mmtf.MMTFFile.read(path)
a1 = mmtf.get_structure(mmtf_file, model=model)
pdbx_file = pdbx.PDBxFile.read(cif_path)
a2 = pdbx.get_structure(pdbx_file, model=model)
# Sometimes mmCIF files can have 'cell' entry
# but corresponding MMTF file has not 'unitCell' entry
# -> Do not assert for dummy entry in mmCIF file
# (all vector elements = {0, 1})
if a2.box is not None and not ((a2.box == 0) | (a2.box == 1)).all():
assert np.allclose(a1.box, a2.box)
for category in a1.get_annotation_categories():
assert a1.get_annotation(category).tolist() == \
a2.get_annotation(category).tolist()
assert a1.coord.flatten().tolist() == \
approx(a2.coord.flatten().tolist(), abs=1e-3)
def test_array_conversion(path, single_model):
model = 1 if single_model else None
mmtf_file = mmtf.MMTFFile.read(path)
a1 = mmtf.get_structure(mmtf_file, model=model, include_bonds=True)
mmtf_file = mmtf.MMTFFile()
mmtf.set_structure(mmtf_file, a1)
temp = TemporaryFile("w+b")
mmtf_file.write(temp)
temp.seek(0)
mmtf_file = mmtf.MMTFFile.read(temp)
temp.close()
a2 = mmtf.get_structure(mmtf_file, model=model, include_bonds=True)
for category in a1.get_annotation_categories():
assert a1.get_annotation(category).tolist() == \
a2.get_annotation(category).tolist()
assert a1.coord.flatten().tolist() == \
approx(a2.coord.flatten().tolist(), abs=1e-3)
assert a1.bonds == a2.bonds
if a1.box is not None:
assert np.allclose(a1.box, a2.box)
def test_dssp(path):
sec_struct_codes = {0 : "I",
1 : "S",
2 : "H",
3 : "E",
4 : "G",
5 : "B",
6 : "T",
7 : "C"}
mmtf_file = mmtf.MMTFFile.read(path)
array = mmtf.get_structure(mmtf_file, model=1)
array = array[array.hetero == False]
first_chain_id = array.chain_id[0]
chain = array[array.chain_id == first_chain_id]
n_residues = struc.get_residue_count(chain)
# Secondary structure annotation in PDB use also DSSP
# -> compare PDB and local DSSP
sse = mmtf_file["secStructList"]
sse = sse[:n_residues]
if (sse == -1).all():
# First chain is not a polypeptide chain (presumably DNA/RNA)
# DSSP not applicable -> return
return
sse = np.array([sec_struct_codes[code] for code in sse],
dtype="U1")
def test_fetch(format, as_file_like):
path = None if as_file_like else tempfile.gettempdir()
file_path_or_obj = rcsb.fetch("1l2y", format, path, overwrite=True)
if format == "pdb":
file = pdb.PDBFile.read(file_path_or_obj)
pdb.get_structure(file)
elif format == "pdbx":
file = pdbx.PDBxFile.read(file_path_or_obj)
pdbx.get_structure(file)
elif format == "mmtf":
file = mmtf.MMTFFile.read(file_path_or_obj)
mmtf.get_structure(file)
elif format == "fasta":
file = fasta.FastaFile.read(file_path_or_obj)
# Test if the file contains any sequences
assert len(fasta.get_sequences(file)) > 0
#########################################################################
# Now that the raw data is prepared, we can load a protein structure for
# which we will display the glycosylation.
# Here we choose the glycosylated peroxidase *4CUO*, as it contains a
# lot of glycans.
#
# The resulting plot makes only sense for a single protein chain.
# In this case the peroxidase structure has only one chain, but since
# this script should also work for any other structure, we filter out
# a single one.
PDB_ID = "4CUO"
CHAIN_ID = "A"
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch(PDB_ID, "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1, include_bonds=True)
structure = structure[structure.chain_id == CHAIN_ID]
# We will need these later:
# An array containing all residue IDs belonging to amino acids
amino_acid_res_ids = np.unique(structure.res_id[~structure.hetero])
# A dictionary mapping residue IDs to their residue names
ids_to_names = {res_id : res_name for res_id, res_name
in zip(structure.res_id, structure.res_name)}
########################################################################
# To determine which residues (including the saccharides) are connected
# with each other, we will use a graph representation:
# The nodes are residues, identified by their respective residue IDs,
# and the edges indicate which residues are connected via covalent
# bonds.
# In this case you probably might want to use MMTF files.
# MMTF files describe structures just like PDB and mmCIF files,
# but they are binary!
# This circumstance increases the downloading and parsing speed by
# several multiples.
# The usage is similar to :class:`PDBxFile`: The :class:`MMTFFile` class
# decodes the file and makes it raw information accessible.
# Via :func:`get_structure()` the data can be loaded into an atom array
# (stack) and :func:`set_structure()` is used to save it back into a
# MMTF file.
import numpy as np
import biotite.structure.io.mmtf as mmtf
mmtf_file_path = rcsb.fetch("1l2y", "mmtf", gettempdir())
mmtf_file = mmtf.MMTFFile.read(mmtf_file_path)
stack = mmtf.get_structure(mmtf_file)
array = mmtf.get_structure(mmtf_file, model=1)
# Do some fancy stuff
mmtf.set_structure(mmtf_file, array)
########################################################################
# A more low level access to MMTF files is also possible:
# An MMTF file is structured as dictionary, with each key being a
# structural feature like the coordinates, the residue ID or the
# secondary structure.
# Most of the fields are encoded to reduce to size of the file,
# but the whole decoding process is handled automatically by
# the :class:`MMTFFile` class:
# If a field is encoded the decoded
# :class:`ndarray` is returned, otherwise the value is directly
# returned.
########################################################################
# As test case a structure of a *cysteine knot* protein is used,
# specifically the squash trypsin inhibitor *EETI-II*
# (PDB: `2IT7 `_).
# This motif is famous for its three characteristic disulfide bridges
# forming a 'knot'.
# However, the loaded MMTF file already has information about the
# covalent bonds - including the disulfide bridges.
# To have a proper test case, all disulfide bonds are removed from the
# structure and we pretend that the structure never had information
# about the disulfide bonds.
# For later verification that the implemented function wroks correctly,
# the disulfide bonds, that are removed, are printed out.
mmtf_file = mmtf.MMTFFile.read(
rcsb.fetch("2IT7", "mmtf", biotite.temp_dir())
)
knottin = mmtf.get_structure(mmtf_file, include_bonds=True, model=1)
sulfide_indices = np.where(
(knottin.res_name == "CYS") & (knottin.atom_name == "SG")
)[0]
for i, j, _ in knottin.bonds.as_array():
if i in sulfide_indices and j in sulfide_indices:
print(knottin[i])
print(knottin[j])
print()
knottin.bonds.remove_bond(i,j)
########################################################################
# Now the sanitized structure is put into the disulfide detection
# function.
[-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
[-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
])
# Fetch animal lysoyzme structures
lyso_files = rcsb.fetch(
["1REX", "1AKI", "1DKJ", "1GD6"],
format="mmtf", target_path=biotite.temp_dir()
)
organisms = ["H. sapiens", "G. gallus", "C. viginianus", "B. mori"]
# Create a PB sequence from each structure
pb_seqs = []
for file_name in lyso_files:
file = mmtf.MMTFFile.read(file_name)
# Take only the first model into account
array = mmtf.get_structure(file, model=1)
# Remove everything but the first protein chain
array = array[struc.filter_amino_acids(array)]
array = array[array.chain_id == array.chain_id[0]]
# Calculate backbone dihedral angles,
# as the PBs are determined from them
phi, psi, omega = struc.dihedral_backbone(array)
# A PB requires the 8 phi/psi angles of 5 amino acids,
# centered on the amino acid to calculate the PB for
# Hence, the PBs are not defined for the two amino acids
# at each terminus
pb_angles = np.full((len(phi)-4, 8), np.nan)
pb_angles[:, 0] = psi[ : -4]
pb_angles[:, 1] = phi[1 : -3]