Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""SeqProp: Sequence set to represent this protein"""
# Structures
self.structures = DictList()
"""DictList: Stored protein structures which are related to this protein"""
self.representative_structure = None
"""StructProp: Structure set to represent this protein, usually in monomeric form"""
self.representative_chain = None
"""str: Chain ID in the representative structure which best represents a sequence"""
self.representative_chain_seq_coverage = 0
"""float: Percent identity of sequence coverage for the representative chain"""
# Alignments
self.sequence_alignments = DictList()
"""DictList: Pairwise or multiple sequence alignments stored as ``Bio.Align.MultipleSeqAlignment`` objects"""
self.structure_alignments = DictList()
"""DictList: Pairwise or multiple structure alignments - currently a placeholder"""
def __init__(self, ident, description=None, chains=None, mapped_chains=None,
is_experimental=False, structure_path=None, file_type=None):
Object.__init__(self, id=ident, description=description)
self.is_experimental = is_experimental
"""bool: Flag to note if this structure is an experimental model or a homology model"""
# Chain information
# chains is a DictList of ChainProp objects
# If you run self.parse_structure(), all chains will be parsed and stored here
# Use mapped_chains below to keep track of chains you are interested in
self.chains = DictList()
"""DictList: A DictList of chains have their sequence stored in them, along with residue-specific"""
if chains:
self.add_chain_ids(chains)
# mapped_chains is an ordered list of mapped chain IDs which would come from BLAST or the best_structures API
self.mapped_chains = []
"""list: A simple list of chain IDs (strings) that will be used to subset analyses"""
if mapped_chains:
self.add_mapped_chain_ids(mapped_chains)
self.parsed = False
"""bool: Simple flag to track if this structure has had its structure + chain sequences parsed"""
# XTODO: rename to sequence_parsed or something similar
# File information
self.file_type = file_type
"""str: Type of structure file"""
def get_experimental_structures(self):
"""DictList: Return a DictList of all experimental structures in self.structures"""
return DictList(x for x in self.structures if x.is_experimental and x.id != self.representative_structure.id)
def filter_out_spontaneous_genes(genes, custom_spont_id=None):
"""Return the DictList of genes that are not spontaneous in a model.
Args:
genes (DictList): Genes DictList
custom_spont_id (str): Optional custom spontaneous ID if it does not match the regular expression ``[Ss](_|)0001``
Returns:
DictList: genes excluding ones that are spontaneous
"""
new_genes = DictList()
for gene in genes:
if not is_spontaneous(gene, custom_id=custom_spont_id):
new_genes.append(gene)
return new_genes
def genes_with_structures(self):
"""DictList: All genes with any mapped protein structures."""
return DictList(x for x in self.genes if x.protein.num_structures > 0)
def functional_genes(self):
"""DictList: All functional genes with a representative sequence"""
return DictList(x for x in self.genes if x.functional)
"""str: ``pdb``, ``pdb.gz``, ``mmcif``, ``cif``, ``cif.gz``, ``xml.gz``, ``mmtf``, ``mmtf.gz`` - choose a file
type for files downloaded from the PDB"""
# Create directories
self._root_dir = None
if root_dir:
self.root_dir = root_dir
# Sequences
self.sequences = DictList()
"""DictList: Stored protein sequences which are related to this protein"""
self.representative_sequence = None
"""SeqProp: Sequence set to represent this protein"""
# Structures
self.structures = DictList()
"""DictList: Stored protein structures which are related to this protein"""
self.representative_structure = None
"""StructProp: Structure set to represent this protein, usually in monomeric form"""
self.representative_chain = None
"""str: Chain ID in the representative structure which best represents a sequence"""
self.representative_chain_seq_coverage = 0
"""float: Percent identity of sequence coverage for the representative chain"""
# Alignments
self.sequence_alignments = DictList()
"""DictList: Pairwise or multiple sequence alignments stored as ``Bio.Align.MultipleSeqAlignment`` objects"""
self.structure_alignments = DictList()
"""DictList: Pairwise or multiple structure alignments - currently a placeholder"""