Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def show_answers():
print("Loading...")
squad = False
if squad:
corpus = SquadCorpus()
docs = corpus.get_train()
data = split_docs(docs)
else:
stop = NltkPlusStopWords()
data = PreprocessedData(TriviaQaWebDataset(),
ExtractSingleParagraph(MergeParagraphs(400), TopTfIdf(stop, 1), intern=True),
InMemoryWebQuestionBuilder(None, None),
eval_on_verified=False
)
data.load_preprocess("triviaqa-web-merge400-tfidf1.pkl.gz")
data = data.get_train().data
print("Get voc...")
detector = NameDetector()
wc = QaCorpusLazyStats(data).get_word_counts()
detector.init(wc)
def show_nn():
corpus = SquadCorpus()
print("Load train")
data = split_docs(corpus.get_train())
print("Comput stats")
wc = QaCorpusLazyStats(data).get_word_counts()
detector = NameDetector(wc)
print("Load vecs")
vecs = corpus.get_resource_loader().load_word_vec("glove.840B.300d")
print('Scanning...')
names = Counter()
for word, c in wc.items():
if detector.is_name(word):
names[word] = c
vec_names = [k for k in names if k in vecs]
print("Have vec for %d/%d (%.4f)" % (len(vec_names), len(names), len(vec_names)/len(names)))
predictor=WithFixedContextPredictionLayer(
ResidualLayer(recurrent_layer),
AttentionEncoder(post_process=MapperSeq(FullyConnected(25, activation="tanh"), DropoutLayer(0.8))),
WithProjectedProduct(include_tiled=True),
ChainBiMapper(
first_layer=recurrent_layer,
second_layer=recurrent_layer
),
span_predictor=BoundedSpanPredictor(20)
)
)
with open(__file__, "r") as f:
notes = f.read()
corpus = SquadCorpus()
train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching)
eval = [LossEvaluator(), BoundedSquadSpanEvaluator(bound=[17])]
trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes)
def main():
embed = DropNamesV2(vec_name="glove.840B.300d",
selector=NameDetector(),
word_vec_init_scale=0, learn_unk=False,
keep_probs=0, kind="shuffle")
corpus = SquadCorpus()
squad = False
print("Loading...")
if squad:
docs = corpus.get_train()
data = split_docs(docs)
else:
stop = NltkPlusStopWords()
data = PreprocessedData(TriviaQaWebDataset(),
ExtractSingleParagraph(MergeParagraphs(400), TopTfIdf(stop, 1), intern=True),
InMemoryWebQuestionBuilder(None, None),
eval_on_verified=False
)
data.load_preprocess("triviaqa-web-merge400-tfidf1.pkl.gz")
data = data.get_train().data
print("Get voc...")