How to use the allennlp.data.fields.MetadataField function in allennlp

To help you get started, we’ve selected a few allennlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github allenai / allennlp-bert-qa-wrapper / pretrained_bert / dataset_reader.py View on Github external
# Zero-pad up to the sequence length.
        while len(input_ids) < self._max_sequence_length:
            input_ids.append(0)
            input_mask.append(0)
            segment_ids.append(0)

        assert len(input_ids) == self._max_sequence_length
        assert len(input_mask) == self._max_sequence_length
        assert len(segment_ids) == self._max_sequence_length
        input_ids_tensor = torch.tensor(input_ids, dtype=torch.long)
        input_mask_tensor = torch.tensor(input_mask, dtype=torch.long)
        segment_ids_tensor = torch.tensor(segment_ids, dtype=torch.long)
        instance = Instance({"input_ids": MetadataField(input_ids_tensor),
                             "token_type_ids": MetadataField(segment_ids_tensor),
                             "attention_mask": MetadataField(input_mask_tensor),
                             "tokens": MetadataField(tokens),
                             "document_tokens": MetadataField(doc_tokens),
                             "token_to_original_map": MetadataField(token_to_orig_map),
                             "token_is_max_context": MetadataField(token_is_max_context)})
        # We truncate the original doc to defined max_sequence_length.
        # Here we only process the first part of doc_spans and return the result.
        return instance
github allenai / allennlp / allennlp / data / dataset_readers / semantic_dependency_parsing.py View on Github external
def text_to_instance(
        self,  # type: ignore
        tokens: List[str],
        pos_tags: List[str] = None,
        arc_indices: List[Tuple[int, int]] = None,
        arc_tags: List[str] = None,
    ) -> Instance:

        fields: Dict[str, Field] = {}
        token_field = TextField([Token(t) for t in tokens], self._token_indexers)
        fields["tokens"] = token_field
        fields["metadata"] = MetadataField({"tokens": tokens})
        if pos_tags is not None:
            fields["pos_tags"] = SequenceLabelField(pos_tags, token_field, label_namespace="pos")
        if arc_indices is not None and arc_tags is not None:
            fields["arc_tags"] = AdjacencyField(arc_indices, token_field, arc_tags)

        return Instance(fields)
github allenai / allennlp-bert-qa-wrapper / pretrained_bert / dataset_reader.py View on Github external
input_mask.append(0)
            segment_ids.append(0)

        assert len(input_ids) == self._max_sequence_length
        assert len(input_mask) == self._max_sequence_length
        assert len(segment_ids) == self._max_sequence_length
        input_ids_tensor = torch.tensor(input_ids, dtype=torch.long)
        input_mask_tensor = torch.tensor(input_mask, dtype=torch.long)
        segment_ids_tensor = torch.tensor(segment_ids, dtype=torch.long)
        instance = Instance({"input_ids": MetadataField(input_ids_tensor),
                             "token_type_ids": MetadataField(segment_ids_tensor),
                             "attention_mask": MetadataField(input_mask_tensor),
                             "tokens": MetadataField(tokens),
                             "document_tokens": MetadataField(doc_tokens),
                             "token_to_original_map": MetadataField(token_to_orig_map),
                             "token_is_max_context": MetadataField(token_is_max_context)})
        # We truncate the original doc to defined max_sequence_length.
        # Here we only process the first part of doc_spans and return the result.
        return instance
github eitanhaimashiah / multibidaf / multibidaf / dataset_readers / util.py View on Github external
fields['question'] = TextField(question_tokens, token_indexers)
    fields['span_start'] = ListField([IndexField(span_start, passage_field) for span_start in span_starts])

    # TODO: Consider convert `answer_texts` and `answer_labels` to `Field` type.
    metadata = {
            'original_passage': passage_text,
            'token_offsets': passage_offsets,
            'sentence_starts': sentence_starts,
            'question_tokens': [token.text for token in question_tokens],
            'passage_tokens': [token.text for token in passage_tokens],
            'answer_texts': answer_texts,
            'answer_labels': answer_labels
    }

    metadata.update(additional_metadata)
    fields['metadata'] = MetadataField(metadata)
    return Instance(fields)
github raylin1000 / drop-bert / drop_bert / data_processing.py View on Github external
if not expression:
                        expression.append(3 * [-1])
                    expression_indices.append(ArrayField(np.array(expression), padding_value=-1))
                if not expression_indices:
                    expression_indices = \
                        [ArrayField(np.array([3 * [-1]]), padding_value=-1) for _ in range(len(self.templates))]
                fields["answer_as_expressions"] = ListField(expression_indices)

            count_fields: List[Field] = [LabelField(count_label, skip_indexing=True) for count_label in valid_counts]
            if not count_fields:
                count_fields.append(LabelField(-1, skip_indexing=True))
            fields["answer_as_counts"] = ListField(count_fields)
            
            fields["num_spans"] = LabelField(num_spans, skip_indexing=True)
        
        fields["metadata"] = MetadataField(metadata)
        
        return Instance(fields)
github plasticityai / magnitude / pymagnitude / third_party / allennlp / data / dataset_readers / conll2003.py View on Github external
def text_to_instance(self, # type: ignore
                         tokens             ,
                         pos_tags            = None,
                         chunk_tags            = None,
                         ner_tags            = None)            :
        u"""
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        sequence = TextField(tokens, self._token_indexers)
        instance_fields                   = {u'tokens': sequence}
        instance_fields[u"metadata"] = MetadataField({u"words": [x.text for x in tokens]})

        # Recode the labels if necessary.
        if self.coding_scheme == u"BIOUL":
            coded_chunks = to_bioul(chunk_tags) if chunk_tags is not None else None
            coded_ner = to_bioul(ner_tags) if ner_tags is not None else None
        else:
            # the default IOB1
            coded_chunks = chunk_tags
            coded_ner = ner_tags

        # Add "feature labels" to instance
        if u'pos' in self.feature_labels:
            if pos_tags is None:
                raise ConfigurationError(u"Dataset reader was specified to use pos_tags as "
                                         u"features. Pass them to text_to_instance.")
            instance_fields[u'pos_tags'] = SequenceLabelField(pos_tags, sequence, u"pos_tags")
github allenai / allennlp / allennlp / data / dataset_readers / reading_comprehension / drop.py View on Github external
)
            if not add_sub_signs_field:
                add_sub_signs_field.append(
                    SequenceLabelField([0] * len(number_tokens), numbers_in_passage_field)
                )
            fields["answer_as_add_sub_expressions"] = ListField(add_sub_signs_field)

            count_fields: List[Field] = [
                LabelField(count_label, skip_indexing=True) for count_label in answer_info["counts"]
            ]
            if not count_fields:
                count_fields.append(LabelField(-1, skip_indexing=True))
            fields["answer_as_counts"] = ListField(count_fields)

        metadata.update(additional_metadata)
        fields["metadata"] = MetadataField(metadata)
        return Instance(fields)
github ConvLab / ConvLab / convlab / modules / nlu / multiwoz / milu / dataset_reader.py View on Github external
intents: List[str] = None, dialog_act: Dict[str, Any] = None) -> Instance:  # type: ignore
        """
        We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
        """
        # pylint: disable=arguments-differ
        fields: Dict[str, Field] = {}
        # print([t.text for t in context_tokens])
        fields["context_tokens"] = TextField(context_tokens, self._token_indexers)
        fields["tokens"] = TextField(tokens, self._token_indexers)
        fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
        if tags is not None:
            fields["tags"] = SequenceLabelField(tags, fields["tokens"])
        if intents is not None:
            fields["intents"] = MultiLabelField(intents, label_namespace="intent_labels")
        if dialog_act is not None:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens],
            'dialog_act': dialog_act})
        else:
            fields["metadata"] = MetadataField({"words": [x.text for x in tokens], 'dialog_act': {}})
        return Instance(fields)
github allenai / allennlp / allennlp / data / dataset_readers / semantic_parsing / quarel.py View on Github external
action_sequence_fields: List[Field] = []
            for logical_form in logical_forms:
                expression = world.parse_logical_form(logical_form)
                action_sequence = world.get_action_sequence(expression)
                try:
                    index_fields: List[Field] = []
                    for production_rule in action_sequence:
                        index_fields.append(IndexField(action_map[production_rule], action_field))
                    action_sequence_fields.append(ListField(index_fields))
                except KeyError as error:
                    logger.info(f"Missing production rule: {error.args}, skipping logical form")
                    logger.info(f"Question was: {question}")
                    logger.info(f"Logical form was: {logical_form}")
                    continue
            fields["target_action_sequences"] = ListField(action_sequence_fields)
        fields["metadata"] = MetadataField(additional_metadata or {})
        return Instance(fields)
github allenai / allennlp-reading-comprehension / allennlp_rc / dataset_readers / drop.py View on Github external
)
            if not add_sub_signs_field:
                add_sub_signs_field.append(
                    SequenceLabelField([0] * len(number_tokens), numbers_in_passage_field)
                )
            fields["answer_as_add_sub_expressions"] = ListField(add_sub_signs_field)

            count_fields: List[Field] = [
                LabelField(count_label, skip_indexing=True) for count_label in answer_info["counts"]
            ]
            if not count_fields:
                count_fields.append(LabelField(-1, skip_indexing=True))
            fields["answer_as_counts"] = ListField(count_fields)

        metadata.update(additional_metadata)
        fields["metadata"] = MetadataField(metadata)
        return Instance(fields)