Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(
self,
token_indexers: Dict[str, TokenIndexer] = None,
tokenizer: Tokenizer = None,
segment_sentences: bool = False,
max_sequence_length: int = None,
skip_label_indexing: bool = False,
lazy: bool = False,
) -> None:
super().__init__(lazy=lazy)
self._tokenizer = tokenizer or SpacyTokenizer()
self._segment_sentences = segment_sentences
self._max_sequence_length = max_sequence_length
self._skip_label_indexing = skip_label_indexing
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if self._segment_sentences:
self._sentence_segmenter = SpacySentenceSplitter()
def utest_data_loader():
num_epoch = 8
seed = 12
batch_size = 32
experiment_name = "mesim_wn_elmo"
lazy = True
dev_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/dev.jsonl"
train_upstream_file = config.RESULT_PATH / "sent_retri/2018_07_05_17:17:50_r/train.jsonl"
# Prepare Data
token_indexers = {
'tokens': SingleIdTokenIndexer(namespace='tokens'), # This is the raw tokens
'elmo_chars': ELMoTokenCharactersIndexer(namespace='elmo_characters') # This is the elmo_characters
}
p_dict = wn_persistent_api.persistence_load()
train_fever_data_reader = WNReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict)
dev_fever_data_reader = WNReader(token_indexers=token_indexers, lazy=lazy, wn_p_dict=p_dict)
complete_upstream_dev_data = get_actual_data(config.T_FEVER_DEV_JSONL, dev_upstream_file)
dev_instances = dev_fever_data_reader.read(complete_upstream_dev_data)
# Load Vocabulary
biterator = BasicIterator(batch_size=batch_size)
vocab, weight_dict = load_vocab_embeddings(config.DATA_ROOT / "vocab_cache" / "nli_basic")
vocab.change_token_with_index_to_namespace('hidden', -2, namespace='labels')
def __init__(self,
context_size: int = 0,
agent: str = None,
random_context_size: bool = True,
token_delimiter: str = None,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False) -> None:
super().__init__(lazy)
self._context_size = context_size
self._agent = agent
self._random_context_size = random_context_size
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._token_delimiter = token_delimiter
def __init__(self,
lazy: bool = False) -> None:
super().__init__(lazy=lazy)
self._stopless_word_tokenizer = WordTokenizer(word_filter=StopwordFilter())
self._full_word_tokenizer = WordTokenizer()
self._stopless_token_indexers = {
"tokens": SingleIdTokenIndexer(namespace="stopless", lowercase_tokens=True)
}
self._full_token_indexers = {
"tokens": SingleIdTokenIndexer(namespace="full", lowercase_tokens=True)
}
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lemma_indexers: Dict[str, TokenIndexer] = None,
action_indexers: Dict[str, TokenIndexer] = None,
arc_tag_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False) -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._lemma_indexers = None
if lemma_indexers is not None and len(lemma_indexers) > 0:
self._lemma_indexers = lemma_indexers
self._action_indexers = None
if action_indexers is not None and len(action_indexers) > 0:
self._action_indexers = action_indexers
self._arc_tag_indexers = None
if arc_tag_indexers is not None and len(arc_tag_indexers) > 0:
self._arc_tag_indexers = arc_tag_indexers
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lemma_indexers: Dict[str, TokenIndexer] = None,
action_indexers: Dict[str, TokenIndexer] = None,
arc_tag_indexers: Dict[str, TokenIndexer] = None,
concept_label_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False) -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
self._lemma_indexers = None
if lemma_indexers is not None and len(lemma_indexers) > 0:
self._lemma_indexers = lemma_indexers
self._action_indexers = None
if action_indexers is not None and len(action_indexers) > 0:
self._action_indexers = action_indexers
self._arc_tag_indexers = None
if arc_tag_indexers is not None and len(arc_tag_indexers) > 0:
self._arc_tag_indexers = arc_tag_indexers
self._concept_label_indexers = concept_label_indexers or {
'concept_label': SingleIdTokenIndexer(namespace='concept_label')}
def main():
all_chars = {END_SYMBOL, START_SYMBOL}
all_chars.update("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .,!?'-")
token_counts = {char: 1 for char in all_chars}
vocab = Vocabulary({'tokens': token_counts})
token_indexers = {'tokens': SingleIdTokenIndexer()}
train_set = read_dataset(all_chars)
instances = [tokens_to_lm_instance(tokens, token_indexers)
for tokens in train_set]
token_embedding = Embedding(num_embeddings=vocab.get_vocab_size('tokens'),
embedding_dim=EMBEDDING_SIZE)
embedder = BasicTextFieldEmbedder({"tokens": token_embedding})
model = RNNLanguageModel(embedder=embedder,
hidden_size=HIDDEN_SIZE,
max_len=80,
vocab=vocab)
iterator = BasicIterator(batch_size=BATCH_SIZE)
iterator.index_with(vocab)
def __init__(self,
lazy = False,
tokenizer = None,
sentence_token_indexers = None,
nonterminal_indexers = None,
terminal_indexers = None,
output_agendas = True) :
super(NlvrDatasetReader, self).__init__(lazy)
self._tokenizer = tokenizer or WordTokenizer()
self._sentence_token_indexers = sentence_token_indexers or {u"tokens": SingleIdTokenIndexer()}
self._nonterminal_indexers = nonterminal_indexers or {u"tokens":
SingleIdTokenIndexer(u"rule_labels")}
self._terminal_indexers = terminal_indexers or {u"tokens": SingleIdTokenIndexer(u"rule_labels")}
self._output_agendas = output_agendas
def __init__(self,
token_indexers: Dict[str, TokenIndexer] = None,
lazy: bool = False,
tokenizer: Tokenizer = None) -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
self._tokenizer = tokenizer or WordTokenizer()
def __init__(
self,
token_indexers: Dict[str, TokenIndexer] = None,
tag_label: str = "chunk",
feature_labels: Sequence[str] = (),
lazy: bool = False,
coding_scheme: str = "BIO",
label_namespace: str = "labels",
) -> None:
super().__init__(lazy)
self._token_indexers = token_indexers or {"tokens": SingleIdTokenIndexer()}
if tag_label is not None and tag_label not in self._VALID_LABELS:
raise ConfigurationError("unknown tag label type: {}".format(tag_label))
for label in feature_labels:
if label not in self._VALID_LABELS:
raise ConfigurationError("unknown feature label type: {}".format(label))
if coding_scheme not in ("BIO", "BIOUL"):
raise ConfigurationError("unknown coding_scheme: {}".format(coding_scheme))
self.tag_label = tag_label
self.feature_labels = set(feature_labels)
self.coding_scheme = coding_scheme
self.label_namespace = label_namespace
self._original_coding_scheme = "BIO"