Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def count_answers():
data = SquadCorpus()
computed = preprocess(tqdm(data.get_dev(), desc="tagging"))
counts = Counter([len(x.answer.answer_spans) for x in computed])
for i in range(0, 11):
print("%d: %d (%.4f)" % (i, counts[i], counts[i]/len(computed)))
ResidualLayer(recurrent_layer),
AttentionEncoder(post_process=MapperSeq(FullyConnected(25, activation="tanh"), DropoutLayer(0.8))),
WithProjectedProduct(include_tiled=True),
ChainBiMapper(
first_layer=recurrent_layer,
second_layer=recurrent_layer
),
IndependentBoundsJointLoss()
)
)
with open(__file__, "r") as f:
notes = f.read()
train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
data = DocumentQaTrainingData(SquadCorpus(), None, train_batching, eval_batching)
eval = [LossEvaluator(), SpanProbability(), BoundedSquadSpanEvaluator(bound=[17])]
trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes, False)
def check_answers():
data = SquadCorpus()
computed = preprocess(tqdm(data.get_train(), desc="tagging"))
for para in tqdm(computed, desc="checking"):
for (start, end) in para.answer.answer_spans:
text = para.paragraph.get_original_text(start, end)
if not any(exact_match_score(x, text) for x in para.answer.answer_text):
raise ValueError()
def test_build_training_data():
train_batching = ClusteredBatcher(60, ContextLenBucketedKey(3), True, False)
eval_batching = ClusteredBatcher(60, ContextLenKey(), False, False)
data = PreprocessedData(SquadCorpus(),
TagTextAnswers(),
ParagraphAndQuestionDatasetBuilder(train_batching, eval_batching),
eval_on_verified=False,
sample=20, sample_dev=20
# sample_dev=100, sample=100, eval_on_verified=False
)
data.preprocess()
data = data.get_train()
for batch in data.get_epoch():
for x in batch:
print(x.answer.answer_spans.shape)
ResidualLayer(BiRecurrentMapper(GruCellSpec(80))),
AttentionEncoder(post_process=MapperSeq(FullyConnected(25, activation="tanh"), DropoutLayer(0.8))),
WithProjectedProduct(include_tiled=True),
ChainBiMapper(
first_layer=BiRecurrentMapper(GruCellSpec(80)),
second_layer=BiRecurrentMapper(GruCellSpec(80))
),
aggregate="sum"
)
)
with open(__file__, "r") as f:
notes = f.read()
train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
data = PreprocessedData(SquadCorpus(),
TagTextAnswers(),
ParagraphAndQuestionDatasetBuilder(train_batching, eval_batching),
# sample=20, sample_dev=20,
eval_on_verified=False)
data.preprocess()
eval = [LossEvaluator(), BoundedSquadSpanEvaluator(bound=[17])]
trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes, False)