How to use the allennlp.data.token_indexers.SingleIdTokenIndexer function in allennlp

To help you get started, we’ve selected a few allennlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github allenai / allennlp / allennlp / data / dataset_readers / text_classification_json.py View on Github external
def __init__(
        self,
        token_indexers: Dict[str, TokenIndexer] = None,
        tokenizer: Tokenizer = None,
        segment_sentences: bool = False,
        max_sequence_length: int = None,
        skip_label_indexing: bool = False,
        lazy: bool = False,
    ) -> None:
        super().__init__(lazy=lazy)
        self._tokenizer = tokenizer or SpacyTokenizer()
        self._segment_sentences = segment_sentences
        self._max_sequence_length = max_sequence_length
        self._skip_label_indexing = skip_label_indexing
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if self._segment_sentences:
            self._sentence_segmenter = SpacySentenceSplitter()
github easonnie / combine-FEVER-NSMN / src / nli / mesim_wn_simi_v1_2.py View on Github external
def utest_data_loader():
    num_epoch = 8
    seed = 12
    batch_size = 32
    experiment_name = "mesim_wn_elmo"
    lazy = True

    dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl"
    train_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/train.jsonl"

    # Prepare Data
    token_indexers = {
        'tokens': SingleIdTokenIndexer(namespace='tokens'),  # This is the raw tokens
        'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters')  # This is the elmo_characters
    }

    p_dict = wn_persistent_api.persistence_load()

    train_fever_data_reader = WNReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict)
    dev_fever_data_reader = WNReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict)

    complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL, dev_upstream_file)
    dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)
    # Load Vocabulary
    biterator = BasicIterator(batch_size=batch_size)

    vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
    vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')
github ConvLab / ConvLab / convlab / modules / nlu / multiwoz / milu / dataset_reader.py View on Github external
def __init__(self,
                 context_size: int = 0,
                 agent: str = None,
                 random_context_size: bool = True,
                 token_delimiter: str = None,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._context_size = context_size
        self._agent = agent 
        self._random_context_size = random_context_size
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._token_delimiter = token_delimiter
github allenai / vampire / dataset_readers / vocab_generator.py View on Github external
def __init__(self,
                 lazy: bool = False) -> None:
        super().__init__(lazy=lazy)
        self._stopless_word_tokenizer = WordTokenizer(word_filter=StopwordFilter())
        self._full_word_tokenizer = WordTokenizer()
        self._stopless_token_indexers = {
            "tokens": SingleIdTokenIndexer(namespace="stopless", lowercase_tokens=True)
        }
        self._full_token_indexers = {
            "tokens": SingleIdTokenIndexer(namespace="full", lowercase_tokens=True)
        }
github DreamerDeo / HIT-SCIR-CoNLL2019 / utils / transition_ucca_reader.py View on Github external
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lemma_indexers: Dict[str, TokenIndexer] = None,
                 action_indexers: Dict[str, TokenIndexer] = None,
                 arc_tag_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
        self._lemma_indexers = None
        if lemma_indexers is not None and len(lemma_indexers) > 0:
            self._lemma_indexers = lemma_indexers
        self._action_indexers = None
        if action_indexers is not None and len(action_indexers) > 0:
            self._action_indexers = action_indexers
        self._arc_tag_indexers = None
        if arc_tag_indexers is not None and len(arc_tag_indexers) > 0:
            self._arc_tag_indexers = arc_tag_indexers
github DreamerDeo / HIT-SCIR-CoNLL2019 / utils / transition_eds_reader.py View on Github external
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lemma_indexers: Dict[str, TokenIndexer] = None,
                 action_indexers: Dict[str, TokenIndexer] = None,
                 arc_tag_indexers: Dict[str, TokenIndexer] = None,
                 concept_label_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}

        self._lemma_indexers = None
        if lemma_indexers is not None and len(lemma_indexers) > 0:
            self._lemma_indexers = lemma_indexers

        self._action_indexers = None
        if action_indexers is not None and len(action_indexers) > 0:
            self._action_indexers = action_indexers

        self._arc_tag_indexers = None
        if arc_tag_indexers is not None and len(arc_tag_indexers) > 0:
            self._arc_tag_indexers = arc_tag_indexers

        self._concept_label_indexers = concept_label_indexers or {
            'concept_label': SingleIdTokenIndexer(namespace='concept_label')}
github mhagiwara / realworldnlp / examples / generation / lm.py View on Github external
def main():
    all_chars = {END_SYMBOL, START_SYMBOL}
    all_chars.update("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,!?'-")
    token_counts = {char: 1 for char in all_chars}
    vocab = Vocabulary({'tokens': token_counts})

    token_indexers = {'tokens': SingleIdTokenIndexer()}

    train_set = read_dataset(all_chars)
    instances = [tokens_to_lm_instance(tokens, token_indexers)
                 for tokens in train_set]

    token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
                                embedding_dim=EMBEDDING_SIZE)
    embedder = BasicTextFieldEmbedder({"tokens": token_embedding})

    model = RNNLanguageModel(embedder=embedder,
                             hidden_size=HIDDEN_SIZE,
                             max_len=80,
                             vocab=vocab)

    iterator = BasicIterator(batch_size=BATCH_SIZE)
    iterator.index_with(vocab)
github plasticityai / magnitude / pymagnitude / third_party / allennlp / data / dataset_readers / nlvr.py View on Github external
def __init__(self,
                 lazy       = False,
                 tokenizer            = None,
                 sentence_token_indexers                          = None,
                 nonterminal_indexers                          = None,
                 terminal_indexers                          = None,
                 output_agendas       = True)        :
        super(NlvrDatasetReader, self).__init__(lazy)
        self._tokenizer = tokenizer or WordTokenizer()
        self._sentence_token_indexers = sentence_token_indexers or {u"tokens": SingleIdTokenIndexer()}
        self._nonterminal_indexers = nonterminal_indexers or {u"tokens":
                                                              SingleIdTokenIndexer(u"rule_labels")}
        self._terminal_indexers = terminal_indexers or {u"tokens": SingleIdTokenIndexer(u"rule_labels")}
        self._output_agendas = output_agendas
github vered1986 / NC_embeddings / source / training / paraphrase_based / compute_vectors.py View on Github external
def __init__(self,
                 token_indexers: Dict[str, TokenIndexer] = None,
                 lazy: bool = False,
                 tokenizer: Tokenizer = None) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        self._tokenizer = tokenizer or WordTokenizer()
github allenai / allennlp / allennlp / data / dataset_readers / conll2000.py View on Github external
def __init__(
        self,
        token_indexers: Dict[str, TokenIndexer] = None,
        tag_label: str = "chunk",
        feature_labels: Sequence[str] = (),
        lazy: bool = False,
        coding_scheme: str = "BIO",
        label_namespace: str = "labels",
    ) -> None:
        super().__init__(lazy)
        self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
        if tag_label is not None and tag_label not in self._VALID_LABELS:
            raise ConfigurationError("unknown tag label type: {}".format(tag_label))
        for label in feature_labels:
            if label not in self._VALID_LABELS:
                raise ConfigurationError("unknown feature label type: {}".format(label))
        if coding_scheme not in ("BIO", "BIOUL"):
            raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme))

        self.tag_label = tag_label
        self.feature_labels = set(feature_labels)
        self.coding_scheme = coding_scheme
        self.label_namespace = label_namespace
        self._original_coding_scheme = "BIO"