Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def show_answers():
print("Loading...")
squad = False
if squad:
corpus = SquadCorpus()
docs = corpus.get_train()
data = split_docs(docs)
else:
stop = NltkPlusStopWords()
data = PreprocessedData(TriviaQaWebDataset(),
ExtractSingleParagraph(MergeParagraphs(400), TopTfIdf(stop, 1), intern=True),
InMemoryWebQuestionBuilder(None, None),
eval_on_verified=False
)
data.load_preprocess("triviaqa-web-merge400-tfidf1.pkl.gz")
data = data.get_train().data
print("Get voc...")
detector = NameDetector()
wc = QaCorpusLazyStats(data).get_word_counts()
detector.init(wc)
FullyConnectedMerge(160)),
match_encoder=SequenceMapperSeq(
BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
DropoutLayer(0.8),
StaticAttentionSelf(DotProductProject(160, bias=True, scale=True, share_project=True),
FullyConnectedMerge(160)),
),
predictor=BoundsPredictor(ChainBiMapper(
first_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
second_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
))
)
with open(__file__, "r") as f:
notes = f.read()
corpus = SquadCorpus()
train_batching = Batcher(45, "bucket_context_words_3", True, False)
eval_batching = Batcher(45, "context_words", False, False)
data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)
eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
SelfAttention(RecurrentAttention(GruCellSpec(75), direction="bidirectional", gated=True)),
DropoutLayer(0.8)),
predictor= ChainConcatPredictor(
start_layer=SequenceMapperSeq(
BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8)),
BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8))),
end_layer=BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8))
)
)
with open(__file__, "r") as f:
notes = f.read()
eval = [LossEvaluator(), SpanEvaluator(), SentenceSpanEvaluator()]
corpus = SquadCorpus()
params = BatchingParameters(60, 60, "bucket_context_words_3",
"context_words", True, False)
data = FixedParagraphQaTrainingData(corpus, None, params, [])
trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
def main():
parser = argparse.ArgumentParser(description="")
parser.add_argument("answers")
args = parser.parse_args()
data = SquadCorpus()
origin_mapping = data.get_original_text_mapping()
stop = set(stopwords.words('english'))
with open(args.answers, "r") as f:
answers = [QuestionAnswer(**x) for x in json.load(f)]
dev_data = {x.question_id:x for x in data.get_dev()}
paragraph_map = {}
for p in dev_data.values():
paragraph_map[(p.article_id, p.paragraph_num)] = p.context
np.random.shuffle(answers)
# tmp = open("/tmp/tmp.csv", "w")
for prediction in answers:
def main():
parser = argparse.ArgumentParser(description="")
parser.add_argument("answers")
parser.add_argument("paragraph")
args = parser.parse_args()
with open(args.answers, "rb") as f:
answers = pickle.load(f)
answers = {x.question_id: x for x in answers}
para_predictions = ParagraphRanks(args.paragraph).get_ranks()
docs = SquadCorpus().get_dev_docs()
max_para_len = max(len(doc.paragraphs) for doc in docs)
top_n_f1_score = np.zeros(max_para_len)
counts = np.zeros(max_para_len)
top_n_span_score = np.zeros(max_para_len)
n_questions = 0
for doc in docs:
for para in doc.paragraphs:
n_questions += len(para.questions)
for question in para.questions:
answer = answers[question.question_id]
best_val = -1
text_f1 = -1
span_f1 = 0
),
embed_mapper=None,
question_mapper=SequenceMapperSeq(BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))),
context_mapper=SequenceMapperSeq(BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))),
memory_builder=NullBiMapper(),
attention=StaticAttention(DotProduct(True), FullyConnectedMerge(160)),
match_encoder=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
predictor=ChainPredictor(
start_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
end_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))
)
)
with open(__file__, "r") as f:
notes = f.read()
corpus = SquadCorpus()
params = BatchingParameters(45, 45, "bucket_context_words_3",
"context_words", True, False)
data = FixedParagraphQaTrainingData(corpus, None, params, [])
eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, True)
attention=RecurrentAttention(LstmCellSpec(80, keep_probs=0.8), BiLinear(80, bias=True)),
match_encoder=SequenceMapperSeq(
BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
DropoutLayer(0.8),
StaticAttentionSelf(DotProductProject(160, bias=True, scale=True, share_project=True),
FullyConnectedMerge(160)),
),
predictor=ChainPredictor(
start_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
end_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))
)
)
with open(__file__, "r") as f:
notes = f.read()
corpus = SquadCorpus()
train_batching = Batcher(45, "bucket_context_words_3", True, False)
eval_batching = Batcher(45, "context_words", False, False)
data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)
eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
def main():
corp = WikiArticleQaCorpus(SquadCorpus(), SquadWikiArticles(), True, 0.15)
corp.get_train_docs()
FullyConnected(160, "tanh"),
BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
DropoutLayer(0.8),
StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
FullyConnected(160, activation="tanh"),
DropoutLayer(0.8),
),
predictor=BoundsPredictor(ChainBiMapper(
first_layer=BiRecurrentMapper(LstmCellSpec(80)),
second_layer=BiRecurrentMapper(LstmCellSpec(80)),
))
)
with open(__file__, "r") as f:
notes = f.read()
corpus = SquadCorpus()
train_batching = Batcher(45, "bucket_context_words_3", True, False)
eval_batching = Batcher(45, "context_words", False, False)
data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)
eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
for p in dev_data.values():
paragraph_map[(p.article_id, p.paragraph_num)] = p.context
np.random.shuffle(answers)
# tmp = open("/tmp/tmp.csv", "w")
for prediction in answers:
point = dev_data[prediction.question_id]
start, end = prediction.doc_span
context = paragraph_map[(point.article_id, prediction.paragraph_num)]
text = origin_mapping.get_raw_text(point.article_id, prediction.paragraph_num, start, end)
text_f1 = 0
for ans in point.answer:
text_f1 = max(text_f1, text_f1_score(text, ans.text))
ans_sent = 0
offset = 0
while end >= offset+len(context[ans_sent]):
offset += len(context[ans_sent])
ans_sent += 1
sent_start = start-offset
sent_end = end - offset
question_words = set(x.lower() for x in point.question if x.lower() not in stop)
if prediction.paragraph_num != point.paragraph_num and text_f1 == 0:
# tmp.write(" ".join(point.question))
# tmp.write("\t" + point.article_title)
# tmp.write("\t" + text)
# tmp.write("\t" + str(list(set(x.text for x in point.answer))))