Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def std():
vecs = SquadCorpus().get_resource_loader().load_word_vec("glove.840B.300d")
mat = np.vstack(list(vecs.values()))
print(mat.shape)
print(mat.mean(axis=0))
print(mat.std(axis=0))
print(mat.mean())
print(mat.std())
def main1():
data = SquadCorpus()
data.dir = join(config.CORPUS_DIR, "squad-v4")
data2 = SquadCorpus()
data2.dir = join(config.CORPUS_DIR, "squad-v2")
train = data.get_dev()
train2 = data2.get_dev()
if len(train) != len(train2):
raise ValueError()
for d1, d2 in zip(train, train2):
if d1.doc_id != d2.doc_id or d1.title != d2.title:
raise ValueError()
if len(d1.paragraphs) != len(d2.paragraphs):
raise ValueError()
for p1, p2 in zip(d1.paragraphs, d2.paragraphs):
if p1.text != p2.text or p1.paragraph_num != p2.paragraph_num or p1.original_text != p2.original_text:
raise ValueError()
if not np.all(p1.spans == p2.spans):
attention=BiAttention(TriLinear(bias=True), True),
match_encoder=SequenceMapperSeq(
FullyConnected(dim*2, activation="relu"),
dropout,
),
predictor=BoundsPredictor(
ChainBiMapper(
first_layer=recurrent_layer,
second_layer=recurrent_layer
),
)
)
with open(__file__, "r") as f:
notes = f.read()
corpus = SquadCorpus()
train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching)
eval = [LossEvaluator(), BoundedSquadSpanEvaluator(bound=[17])]
trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes)
out = get_output_name_from_cli()
train_params = TrainParams(SerializableOptimizer("Adadelta", dict(learning_rate=1.0)),
num_epochs=16, eval_period=900, log_period=30,
async_encoding=5,
save_period=900, eval_samples=dict(train=6000, dev=6000))
model = ContextOnly(
DocumentAndQuestionEncoder(SingleSpanAnswerEncoder()),
FixedWordEmbedder(vec_name="glove.6B.100d", word_vec_init_scale=0, learn_unk=False),
None,
FullyConnected(50),
BoundsPredictor(NullBiMapper())
)
corpus = SquadCorpus()
train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
data = DocumentQaTrainingData(corpus, None, train_batching, eval_batching)
eval = [LossEvaluator(), BoundedSquadSpanEvaluator(bound=[17])]
trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), "")
print("Top3: %.4f" % (ranks < 4).mean())
print("Top5: %.4f" % (ranks < 6).mean())
print("Top10: %.4f" % (ranks < 11).mean())
def save_prediction(df, feature, output):
answer_dict = {}
for question_id, question_df in df.groupby(level="question_id"):
values = question_df[feature].sort_index()
answer_dict[question_id] = list(values.values)
with open(output, "w") as f:
json.dump(answer_dict, f)
if __name__ == "__main__":
corp = SquadCorpus()
print("Loading...")
docs = corp.get_train()[:5]
print("Building features....")
df = build_features(docs, corp.get_resource_loader(), None, None, seed=0, dev=corp.get_dev()[:5])
print("Classifier..,")
get_classifier_dev_scores(df)
show_eval(df[df.source == "dev"])
def main1():
data = SquadCorpus()
data.dir = join(config.CORPUS_DIR, "squad-v4")
data2 = SquadCorpus()
data2.dir = join(config.CORPUS_DIR, "squad-v2")
train = data.get_dev()
train2 = data2.get_dev()
if len(train) != len(train2):
raise ValueError()
for d1, d2 in zip(train, train2):
if d1.doc_id != d2.doc_id or d1.title != d2.title:
raise ValueError()
if len(d1.paragraphs) != len(d2.paragraphs):
raise ValueError()
for p1, p2 in zip(d1.paragraphs, d2.paragraphs):
if p1.text != p2.text or p1.paragraph_num != p2.paragraph_num or p1.original_text != p2.original_text: