Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
At first we will check, which assemblies are available to us.
"""
# Code source: Patrick Kunzmann
# License: BSD 3 clause
from tempfile import NamedTemporaryFile
import numpy as np
import biotite.structure as struc
import biotite.structure.io.pdbx as pdbx
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
pdbx_file = pdbx.PDBxFile.read(rcsb.fetch("3J31", "mmcif"))
assemblies = pdbx.list_assemblies(pdbx_file)
print("ID name")
print()
for assembly_id, name in assemblies.items():
print(f"{assembly_id:2} {name}")
########################################################################
# ``'complete icosahedral assembly'`` sounds good.
# In fact, often the first assembly is the complete one.
# Hence, the :func:`get_assembly()` function builds the first assembly
# by default.
# Since we know the ID we want (``'1'``), we will provide it to this
# function anyway.
# It returns the chosen assembly as :class:`AtomArray`.
# Note that the assembly ID is a string, not an integer.
"""
# Code source: Patrick Kunzmann
# License: BSD 3 clause
from tempfile import gettempdir
import biotite
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
file_name = rcsb.fetch("1aki", "mmtf", gettempdir())
array = strucio.load_structure(file_name)
# We only consider CA atoms
ca = array[array.atom_name == "CA"]
# 7 Angstrom adjacency threshold
threshold = 7
# Create cell list of the CA atom array
# for efficient measurement of adjacency
cell_list = struc.CellList(ca, cell_size=threshold)
adjacency_matrix = cell_list.create_adjacency_matrix(threshold)
figure = plt.figure()
ax = figure.add_subplot(111)
cmap = ListedColormap(["white", biotite.colors["dimgreen"]])
#ax.matshow(adjacency_matrix, cmap=cmap, origin="lower")
ax.pcolormesh(ca.res_id, ca.res_id, adjacency_matrix, cmap=cmap)
ax.set_aspect("equal")
[116.40, -105.53, 129.32, -96.68, 140.72, -74.19, -26.65, -94.51],
[ 0.40, -81.83, 4.91, -100.59, 85.50, -71.65, 130.78, 84.98],
[119.14, -102.58, 130.83, -67.91, 121.55, 76.25, -2.95, -90.88],
[130.68, -56.92, 119.26, 77.85, 10.42, -99.43, 141.40, -98.01],
[114.32, -121.47, 118.14, 82.88, -150.05, -83.81, 23.35, -85.82],
[117.16, -95.41, 140.40, -59.35, -29.23, -72.39, -25.08, -76.16],
[139.20, -55.96, -32.70, -68.51, -26.09, -74.44, -22.60, -71.74],
[-39.62, -64.73, -39.52, -65.54, -38.88, -66.89, -37.76, -70.19],
[-35.34, -65.03, -38.12, -66.34, -29.51, -89.10, -2.91, 77.90],
[-45.29, -67.44, -27.72, -87.27, 5.13, 77.49, 30.71, -93.23],
[-27.09, -86.14, 0.30, 59.85, 21.51, -96.30, 132.67, -92.91],
])
# Fetch animal lysoyzme structures
lyso_files = rcsb.fetch(
["1REX", "1AKI", "1DKJ", "1GD6"],
format="mmtf", target_path=biotite.temp_dir()
)
organisms = ["H. sapiens", "G. gallus", "C. viginianus", "B. mori"]
# Create a PB sequence from each structure
pb_seqs = []
for file_name in lyso_files:
file = mmtf.MMTFFile.read(file_name)
# Take only the first model into account
array = mmtf.get_structure(file, model=1)
# Remove everything but the first protein chain
array = array[struc.filter_amino_acids(array)]
array = array[array.chain_id == array.chain_id[0]]
# Calculate backbone dihedral angles,
# Code source: Patrick Kunzmann
# License: BSD 3 clause
import numpy as np
import biotite.structure as struc
import biotite.structure.io.mmtf as mmtf
import biotite.database.rcsb as rcsb
# The maximum distance between an atom in the repressor and an atom in
# the DNA for them to be considered 'in contact'
THRESHOLD_DISTANCE = 4.0
# Fetch and load structure
mmtf_file = mmtf.MMTFFile.read(rcsb.fetch("2or1", "mmtf"))
structure = mmtf.get_structure(mmtf_file, model=1)
# Separate structure into the DNA and the two identical protein chains
dna = structure[
np.isin(structure.chain_id, ["A", "B"]) & (structure.hetero == False)
]
protein_l = structure[
(structure.chain_id == "L") & (structure.hetero == False)
]
protein_r = structure[
(structure.chain_id == "R") & (structure.hetero == False)
]
# Quick check if the two protein chains are really identical
assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r))