How to use protmapper - 9 common examples

To help you get started, we’ve selected a few protmapper examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sorgerlab / indra / indra / sources / hprd / processor.py View on Github external
def _get_seq_motif(self, refseq_id, residue, pos_str):
        seq = self.seq_dict[refseq_id]
        pos_1ix = int(pos_str)
        pos_0ix = pos_1ix - 1
        if seq[pos_0ix] != residue:
            self.invalid_site_pos.append((refseq_id, residue, pos_str))
            if seq[pos_0ix + 1] == residue:
                self.off_by_one.append((refseq_id, residue, pos_str))
                motif, respos = \
                   ProtMapper.motif_from_position_seq(seq, pos_1ix + 1,
                                                      self.motif_window)
                return {'site_motif': {'motif': motif, 'respos': respos,
                                       'off_by_one': True}}
            else:
                return {}
        else:
            # The index of the residue at the start of the window
            motif, respos = ProtMapper.motif_from_position_seq(seq, pos_1ix,
                                                             self.motif_window)
            return {'site_motif': {'motif': motif, 'respos': respos,
                                   'off_by_one': False}}
github sorgerlab / indra / indra / sources / hprd / processor.py View on Github external
pos_1ix = int(pos_str)
        pos_0ix = pos_1ix - 1
        if seq[pos_0ix] != residue:
            self.invalid_site_pos.append((refseq_id, residue, pos_str))
            if seq[pos_0ix + 1] == residue:
                self.off_by_one.append((refseq_id, residue, pos_str))
                motif, respos = \
                   ProtMapper.motif_from_position_seq(seq, pos_1ix + 1,
                                                      self.motif_window)
                return {'site_motif': {'motif': motif, 'respos': respos,
                                       'off_by_one': True}}
            else:
                return {}
        else:
            # The index of the residue at the start of the window
            motif, respos = ProtMapper.motif_from_position_seq(seq, pos_1ix,
                                                             self.motif_window)
            return {'site_motif': {'motif': motif, 'respos': respos,
                                   'off_by_one': False}}
github sorgerlab / indra / indra / sources / hprd / processor.py View on Github external
assert hgnc_name is not None
        # See if we can get a Uniprot ID from the HGNC symbol--if there is
        # a RefSeq ID we wil also try to use it to get an isoform specific
        # UP ID, but we will have this one to fall back on. But if we can't
        # get one here, then we skip the Statement
        up_id_from_hgnc = hgnc_client.get_uniprot_id(hgnc_id)
        if not up_id_from_hgnc:
            self.no_up_for_hgnc.append((egid, hgnc_name, hgnc_id))
            return None
        # If we have provided the RefSeq ID, it's because we need to make
        # sure that we are getting the right isoform-specific ID (for sequence
        # positions of PTMs). Here we try to get the Uniprot ID from the
        # Refseq->UP mappings in the protmapper.uniprot_client.
        if refseq_id is not None:
            # Get the Uniprot IDs from the uniprot client
            up_ids = uniprot_client.get_ids_from_refseq(refseq_id,
                                                        reviewed_only=True)
            # Nothing for this RefSeq ID (quite likely because the RefSeq ID
            # is obsolete; take the UP ID from HGNC
            if len(up_ids) == 0:
                self.no_up_for_refseq.append(refseq_id)
                up_id = up_id_from_hgnc
            # More than one reviewed entry--no thanks, we'll take the one from
            # HGNC instead
            elif len(up_ids) > 1:
                self.many_ups_for_refseq.append(refseq_id)
                up_id = up_id_from_hgnc
            # We got a unique, reviewed UP entry for the RefSeq ID
            else:
                up_id = up_ids[0]
                # If it's the canonical isoform, strip off the '-1'
                if up_id.endswith('-1'):
github pybel / pybel / src / pybel / grounding.py View on Github external
concept[NAMESPACE] = _mapped[NAMESPACE]
        concept[IDENTIFIER] = _mapped[IDENTIFIER]
        concept[NAME] = _mapped[NAME]
        return True
    elif prefix == 'bel':
        logger.warning('could not figure out how to map bel ! %s', name)
        return False

    if prefix == 'uniprot':
        # assume identifier given as name
        identifier = get_id_from_mnemonic(name)
        if identifier is not None:
            concept[IDENTIFIER] = identifier
            return True

        mnemomic = get_mnemonic(name, web_fallback=True)
        if mnemomic is not None:
            concept[IDENTIFIER] = name
            concept[NAME] = mnemomic
            return True

        logger.warning('could not interpret uniprot name: %s', name)
        return False

    try:
        id_name_mapping = get_name_id_mapping(prefix)
    except (NoOboFoundry, MissingOboBuild) as e:
        logger.warning('could not get namespace %s - %s', prefix, e)
        return False

    if id_name_mapping is None:
        logger.warning('unhandled namespace in %s ! %s', prefix, name)
github pybel / pybel / src / pybel / grounding.py View on Github external
def _handle_identifier_not_name(*, concept, prefix, identifier) -> bool:
    # Some namespaces are just too much of a problem at the moment to look up
    if prefix in SKIP:
        return False

    if prefix in NO_NAMES:
        concept[NAME] = concept[IDENTIFIER]
        return True

    if prefix == 'uniprot':
        concept[NAME] = get_mnemonic(identifier)
        return True

    try:
        id_name_mapping = get_id_name_mapping(prefix)
    except (NoOboFoundry, MissingOboBuild):
        return False

    if id_name_mapping is None:
        logger.warning('could not get names for prefix %s', prefix)
        return False
    name = id_name_mapping.get(identifier)
    if name is None:
        logger.warning('could not get name for %s:%s', prefix, identifier)
        return False
    concept[NAME] = name
github pybel / pybel / src / pybel / grounding.py View on Github external
concept[IDENTIFIER] = _mapped[IDENTIFIER]
        concept[NAME] = _mapped[NAME]
        return True
    elif prefix == 'bel' and name in compartment_mapping:
        _mapped = compartment_mapping[name]
        concept[NAMESPACE] = _mapped[NAMESPACE]
        concept[IDENTIFIER] = _mapped[IDENTIFIER]
        concept[NAME] = _mapped[NAME]
        return True
    elif prefix == 'bel':
        logger.warning('could not figure out how to map bel ! %s', name)
        return False

    if prefix == 'uniprot':
        # assume identifier given as name
        identifier = get_id_from_mnemonic(name)
        if identifier is not None:
            concept[IDENTIFIER] = identifier
            return True

        mnemomic = get_mnemonic(name, web_fallback=True)
        if mnemomic is not None:
            concept[IDENTIFIER] = name
            concept[NAME] = mnemomic
            return True

        logger.warning('could not interpret uniprot name: %s', name)
        return False

    try:
        id_name_mapping = get_name_id_mapping(prefix)
    except (NoOboFoundry, MissingOboBuild) as e:
github sorgerlab / indra / indra / preassembler / sitemapper.py View on Github external
mm_ws = '\n' + (' ' * 17)
            mm_str = mm_ws.join([str(mm) for mm in self.mapped_mods])

        summary = textwrap.dedent("""
            MappedStatement:
                original_stmt: {0}
                mapped_mods: {1}
                mapped_stmt: {2}
            """)
        return summary.format(self.original_stmt, mm_str, self.mapped_stmt)

    def __repr__(self):
        return str(self)


class SiteMapper(ProtMapper):
    """
    Use site information to fix modification sites in Statements.

    This is a wrapper around the protmapper package's ProtMapper class and adds
    all the additional functionality to handle INDRA Statements and Agents.

    Parameters
    ----------
    site_map : dict (as returned by :py:func:`load_site_map`)
        A dict mapping tuples of the form `(gene, orig_res, orig_pos)` to a
        tuple of the form `(correct_res, correct_pos, comment)`, where `gene`
        is the string name of the gene (canonicalized to HGNC); `orig_res` and
        `orig_pos` are the residue and position to be mapped; `correct_res` and
        `correct_pos` are the corrected residue and position, and `comment` is
        a string describing the reason for the mapping (species error, isoform
        error, wrong residue name, etc.).
github sorgerlab / indra / indra / preassembler / sitemapper.py View on Github external
return None
        # If no site information for this residue, skip
        if mod_condition.position is None or mod_condition.residue is None:
            return None
        # Otherwise, try to map it and return the mapped site
        mapped_site = \
            self.map_to_human_ref(up_id, 'uniprot',
                mod_condition.residue,
                mod_condition.position,
                do_methionine_offset=self.do_methionine_offset,
                do_orthology_mapping=self.do_orthology_mapping,
                do_isoform_mapping=self.do_isoform_mapping)
        return mapped_site


default_mapper = SiteMapper(default_site_map)


# TODO: determine if this should be done in the protmapper or if this is the
# preferred place
@lru_cache(maxsize=10000)
def _get_uniprot_id(agent):
    """Return the UniProt ID for an agent, looking up in HGNC if necessary.

    If the UniProt ID is a list then return the first ID by default.
    """
    up_id = agent.db_refs.get('UP')
    hgnc_id = agent.db_refs.get('HGNC')
    if up_id is None:
        if hgnc_id is None:
            # If both UniProt and HGNC refs are missing we can't
            # sequence check and so don't report a failure.
github sorgerlab / indra / indra / sources / hprd / api.py View on Github external
'ppi_file must be given.')
    if ptm_file and not seq_file:
        raise ValueError('If ptm_file is given, seq_file must also be given.')
    # Load complexes into dataframe
    cplx_df = None
    if complexes_file:
        cplx_df = pd.read_csv(complexes_file, delimiter='\t', names=_cplx_cols,
                              dtype='str', na_values=['-', 'None'])
    # Load ptm data into dataframe
    ptm_df = None
    seq_dict = None
    if ptm_file:
        ptm_df = pd.read_csv(ptm_file, delimiter='\t', names=_ptm_cols,
                             dtype='str', na_values='-')
        # Load protein sequences as a dict keyed by RefSeq ID
        seq_dict = load_fasta_sequences(seq_file, id_index=2)
    # Load the PPI data into dataframe
    ppi_df = None
    if ppi_file:
        ppi_df = pd.read_csv(ppi_file, delimiter='\t', names=_ppi_cols,
                             dtype='str')
    # Create the processor
    return HprdProcessor(id_df, cplx_df, ptm_df, ppi_df, seq_dict, motif_window)

protmapper

Map protein sites to human reference sequence.

BSD-2-Clause
Latest version published 10 months ago

Package Health Score

51 / 100
Full package analysis

Similar packages