How to use the snorkel.models.StableLabel function in snorkel

To help you get started, we’ve selected a few snorkel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github snorkel-team / snorkel / snorkel / db_helpers.py View on Github external
def reload_annotator_labels(session, candidate_class, annotator_name, split, filter_label_split=True, create_missing_cands=False):
    """Reloads stable annotator labels into the AnnotatorLabel table"""
    # Sets up the AnnotatorLabelKey to use
    ak = session.query(GoldLabelKey).filter(GoldLabelKey.name == annotator_name).first()
    if ak is None:
        ak = GoldLabelKey(name=annotator_name)
        session.add(ak)
        session.commit()

    labels = []
    missed = []
    sl_query = session.query(StableLabel).filter(StableLabel.annotator_name == annotator_name)
    sl_query = sl_query.filter(StableLabel.split == split) if filter_label_split else sl_query
    for sl in sl_query.all():
        context_stable_ids = sl.context_stable_ids.split('~~')

        # Check for labeled Contexts
        # TODO: Does not create the Contexts if they do not yet exist!
        contexts = []
        for stable_id in context_stable_ids:
            context = session.query(Context).filter(Context.stable_id == stable_id).first()
            if context:
                contexts.append(context)
        if len(contexts) < len(context_stable_ids):
            missed.append(sl)
            continue

        # Check for Candidate
        # Assemble candidate arguments
github snorkel-team / snorkel / tutorials / workshop / lib / util.py View on Github external
gold_labels = pd.read_csv(FPATH, sep="\t")
    for index, row in gold_labels.iterrows():    

        # We check if the label already exists, in case this cell was already executed
        context_stable_ids = "~~".join([row['person1'], row['person2']])
        query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator_name,
                value=row['label']))
                    
        # Because it's a symmetric relation, load both directions...
        context_stable_ids = "~~".join([row['person2'], row['person1']])
        query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator_name,
                value=row['label']))

    # Commit session
    session.commit()

    # Reload annotator labels
    reload_annotator_labels(session, candidate_class, annotator_name, split=1, filter_label_split=False)
    reload_annotator_labels(session, candidate_class, annotator_name, split=2, filter_label_split=False)
github snorkel-team / snorkel / snorkel / viewer / __init__.py View on Github external
if self.annotations[cid].value != value:
                    self.annotations[cid].value        = value
                    self.annotations_stable[cid].value = value
                    self.session.commit()

            # Otherwise, create a AnnotatorLabel *and a StableLabel*
            else:
                candidate = self.candidates[cid]

                # Create AnnotatorLabel
                self.annotations[cid] = GoldLabel(key=self.annotator, candidate=candidate, value=value)
                self.session.add(self.annotations[cid])

                # Create StableLabel
                context_stable_ids           = '~~'.join([c.stable_id for c in candidate.get_contexts()])
                self.annotations_stable[cid] = StableLabel(context_stable_ids=context_stable_ids,\
                                                           annotator_name=self.annotator.name,\
                                                           value=value,\
                                                           split=candidate.split)
                self.session.add(self.annotations_stable[cid])
                self.session.commit()

        elif content.get('event', '') == 'delete_label':
            cid = content.get('cid', None)
            self.session.delete(self.annotations[cid])
            self.annotations[cid] = None
            self.session.delete(self.annotations_stable[cid])
            self.annotations_stable[cid] = None
            self.session.commit()
github snorkel-team / snorkel / snorkel / viewer / __init__.py View on Github external
.filter(GoldLabel.candidate == candidate) \
                .first()
            if existing_annotation is not None:
                self.annotations[i] = existing_annotation
                if existing_annotation.value == 1:
                    value_string = 'true'
                elif existing_annotation.value == -1:
                    value_string = 'false'
                else:
                    raise ValueError(str(existing_annotation) +
                                     ' has value not in {1, -1}, which Viewer does not support.')
                init_labels_serialized.append(str(i) + '~~' + value_string)

                # If the annotator label is in the main table, also get its stable version
                context_stable_ids = '~~'.join([c.stable_id for c in candidate.get_contexts()])
                existing_annotation_stable = self.session.query(StableLabel) \
                                                 .filter(StableLabel.context_stable_ids == context_stable_ids)\
                                                 .filter(StableLabel.annotator_name == name).one_or_none()

                # If stable version is not available, create it here
                # NOTE: This is for versioning issues, should be removed?
                if existing_annotation_stable is None:
                    context_stable_ids         = '~~'.join([c.stable_id for c in candidate.get_contexts()])
                    existing_annotation_stable = StableLabel(context_stable_ids=context_stable_ids,\
                                                             annotator_name=self.annotator.name,\
                                                             split=candidate.split,\
                                                             value=existing_annotation.value)
                    self.session.add(existing_annotation_stable)
                    self.session.commit()

                self.annotations_stable[i] = existing_annotation_stable
github snorkel-team / snorkel / tutorials / cdr / load_external_annotations.py View on Github external
def load_external_labels(session, candidate_class, split, annotator='gold',
    label_fname='data/cdr_relations_gold.pkl', id_fname='data/doc_ids.pkl'):
    # Load document-level relation annotations
    with open(label_fname, 'rb') as f:
        relations = load(f)
    # Get split candidates
    candidates = session.query(candidate_class).filter(
        candidate_class.split == split
    ).all()
    for c in candidates:
        # Get the label by mapping document annotations to mentions
        doc_relations = relations.get(c.get_parent().get_parent().name, set())
        label = 2 * int(c.get_cids() in doc_relations) - 1        
        # Get stable ids and check to see if label already exits
        context_stable_ids = '~~'.join(x.get_stable_id() for x in c)
        query = session.query(StableLabel).filter(
            StableLabel.context_stable_ids == context_stable_ids
        )
        query = query.filter(StableLabel.annotator_name == annotator)
        # If does not already exist, add label
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator,
                value=label
            ))

    # Commit session
    session.commit()

    # Reload annotator labels
    reload_annotator_labels(session, candidate_class, annotator,
github snorkel-team / snorkel / snorkel / contrib / brat / tools.py View on Github external
rela_type, arg1, arg2 = annotations[name][key]
                    rela = sorted([[annotations[name][arg1]["entity_type"], spans[arg1]],
                                    [annotations[name][arg2]["entity_type"],spans[arg2]]])
                    stable_labels_by_type[rela_type].append("~~".join(zip(*rela)[1]))

        # create stable labels
        # NOTE: we store each label class type in a different split so that it is compatible with
        # the current version of 'reload_annotator_labels', where we create candidates by split id
        for i, class_type in enumerate(stable_labels_by_type):

            for context_stable_id in stable_labels_by_type[class_type]:
                query = self.session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_id)
                query = query.filter(StableLabel.annotator_name == annotator_name)
                if query.count() != 0:
                    continue
                self.session.add(StableLabel(context_stable_ids=context_stable_id, split=i,
                                             annotator_name=annotator_name, value=1))

        abs_offsets = {}
        entity_types = defaultdict(list)
        for i, class_type in enumerate(stable_labels_by_type):

            if class_type in self.subclasses:
                class_name = self.subclasses[class_type]
            else:
                class_name = self.subclasses[self._get_normed_rela_name(class_type)]

            for et in stable_labels_by_type[class_type]:
                contexts = et.split('~~')
                spans = []

                for c,et in zip(contexts,class_name.__argnames__):
github snorkel-team / snorkel / tutorials / cdr / load_external_annotations.py View on Github external
with open(label_fname, 'rb') as f:
        relations = load(f)
    # Get split candidates
    candidates = session.query(candidate_class).filter(
        candidate_class.split == split
    ).all()
    for c in candidates:
        # Get the label by mapping document annotations to mentions
        doc_relations = relations.get(c.get_parent().get_parent().name, set())
        label = 2 * int(c.get_cids() in doc_relations) - 1        
        # Get stable ids and check to see if label already exits
        context_stable_ids = '~~'.join(x.get_stable_id() for x in c)
        query = session.query(StableLabel).filter(
            StableLabel.context_stable_ids == context_stable_ids
        )
        query = query.filter(StableLabel.annotator_name == annotator)
        # If does not already exist, add label
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator,
                value=label
            ))

    # Commit session
    session.commit()

    # Reload annotator labels
    reload_annotator_labels(session, candidate_class, annotator,
                            split=split, filter_label_split=False)
github snorkel-team / snorkel / tutorials / intro / util.py View on Github external
# We check if the label already exists, in case this cell was already executed
        context_stable_ids = "~~".join([row['person1'], row['person2']])
        query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator_name,
                value=row['label']))
                    
        # Because it's a symmetric relation, load both directions...
        context_stable_ids = "~~".join([row['person2'], row['person1']])
        query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator_name,
                value=row['label']))

    # Commit session
    session.commit()

    # Reload annotator labels
    reload_annotator_labels(session, candidate_class, annotator_name, split=1, filter_label_split=False)
    reload_annotator_labels(session, candidate_class, annotator_name, split=2, filter_label_split=False)
github snorkel-team / snorkel / snorkel / db_helpers.py View on Github external
def reload_annotator_labels(session, candidate_class, annotator_name, split, filter_label_split=True, create_missing_cands=False):
    """Reloads stable annotator labels into the AnnotatorLabel table"""
    # Sets up the AnnotatorLabelKey to use
    ak = session.query(GoldLabelKey).filter(GoldLabelKey.name == annotator_name).first()
    if ak is None:
        ak = GoldLabelKey(name=annotator_name)
        session.add(ak)
        session.commit()

    labels = []
    missed = []
    sl_query = session.query(StableLabel).filter(StableLabel.annotator_name == annotator_name)
    sl_query = sl_query.filter(StableLabel.split == split) if filter_label_split else sl_query
    for sl in sl_query.all():
        context_stable_ids = sl.context_stable_ids.split('~~')

        # Check for labeled Contexts
        # TODO: Does not create the Contexts if they do not yet exist!
        contexts = []
        for stable_id in context_stable_ids:
            context = session.query(Context).filter(Context.stable_id == stable_id).first()
            if context:
                contexts.append(context)
        if len(contexts) < len(context_stable_ids):
            missed.append(sl)
            continue

        # Check for Candidate
github snorkel-team / snorkel / tutorials / workshop / lib / util.py View on Github external
def load_external_labels(session, candidate_class, annotator_name='gold'):
    gold_labels = pd.read_csv(FPATH, sep="\t")
    for index, row in gold_labels.iterrows():    

        # We check if the label already exists, in case this cell was already executed
        context_stable_ids = "~~".join([row['person1'], row['person2']])
        query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator_name,
                value=row['label']))
                    
        # Because it's a symmetric relation, load both directions...
        context_stable_ids = "~~".join([row['person2'], row['person1']])
        query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
        query = query.filter(StableLabel.annotator_name == annotator_name)
        if query.count() == 0:
            session.add(StableLabel(
                context_stable_ids=context_stable_ids,
                annotator_name=annotator_name,
                value=row['label']))