How to use the squad.squad.SquadCorpus function in squad

To help you get started, we’ve selected a few squad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github allenai / document-qa / train_squad / train_base5.py View on Github external
FullyConnectedMerge(160)),
        match_encoder=SequenceMapperSeq(
            BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            DropoutLayer(0.8),
            StaticAttentionSelf(DotProductProject(160, bias=True, scale=True, share_project=True),
                                FullyConnectedMerge(160)),
        ),
        predictor=BoundsPredictor(ChainBiMapper(
            first_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            second_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
        ))
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    train_batching = Batcher(45, "bucket_context_words_3", True, False)
    eval_batching = Batcher(45, "context_words", False, False)
    data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / train_squad / r_net.py View on Github external
SelfAttention(RecurrentAttention(GruCellSpec(75), direction="bidirectional", gated=True)),
                                        DropoutLayer(0.8)),
        predictor= ChainConcatPredictor(
            start_layer=SequenceMapperSeq(
                BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8)),
                BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8))),
            end_layer=BiRecurrentMapper(LstmCellSpec(100, keep_probs=0.8))
        )
    )

    with open(__file__, "r") as f:
        notes = f.read()

    eval = [LossEvaluator(), SpanEvaluator(), SentenceSpanEvaluator()]

    corpus = SquadCorpus()
    params = BatchingParameters(60, 60, "bucket_context_words_3",
                                "context_words", True, False)
    data = FixedParagraphQaTrainingData(corpus, None, params, [])

    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / docqa / data_analysis / visualize_full_doc_errors.py View on Github external
def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("answers")
    args = parser.parse_args()

    data = SquadCorpus()
    origin_mapping = data.get_original_text_mapping()

    stop = set(stopwords.words('english'))

    with open(args.answers, "r") as f:
        answers = [QuestionAnswer(**x) for x in json.load(f)]

    dev_data = {x.question_id:x for x in data.get_dev()}
    paragraph_map = {}
    for p in dev_data.values():
        paragraph_map[(p.article_id, p.paragraph_num)] = p.context

    np.random.shuffle(answers)
    # tmp = open("/tmp/tmp.csv", "w")

    for prediction in answers:
github allenai / document-qa / experimental / batch_paragraph_selection / show_paragraph_selection_fixes.py View on Github external
def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("answers")
    parser.add_argument("paragraph")
    args = parser.parse_args()

    with open(args.answers, "rb") as f:
        answers = pickle.load(f)
    answers = {x.question_id: x for x in answers}

    para_predictions = ParagraphRanks(args.paragraph).get_ranks()

    docs = SquadCorpus().get_dev_docs()

    max_para_len = max(len(doc.paragraphs) for doc in docs)
    top_n_f1_score = np.zeros(max_para_len)
    counts = np.zeros(max_para_len)
    top_n_span_score = np.zeros(max_para_len)

    n_questions = 0
    for doc in docs:
        for para in doc.paragraphs:
            n_questions += len(para.questions)
            for question in para.questions:
                answer = answers[question.question_id]

                best_val = -1
                text_f1 = -1
                span_f1 = 0
github allenai / document-qa / train_squad / train_base4.py View on Github external
),
        embed_mapper=None,
        question_mapper=SequenceMapperSeq(BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))),
        context_mapper=SequenceMapperSeq(BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))),
        memory_builder=NullBiMapper(),
        attention=StaticAttention(DotProduct(True), FullyConnectedMerge(160)),
        match_encoder=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
        predictor=ChainPredictor(
            start_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            end_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))
        )
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    params = BatchingParameters(45, 45, "bucket_context_words_3",
                                "context_words", True, False)
    data = FixedParagraphQaTrainingData(corpus, None, params, [])

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, True)
github allenai / document-qa / train_squad / train_recurrent_atten.py View on Github external
attention=RecurrentAttention(LstmCellSpec(80, keep_probs=0.8), BiLinear(80, bias=True)),
        match_encoder=SequenceMapperSeq(
            BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            DropoutLayer(0.8),
            StaticAttentionSelf(DotProductProject(160, bias=True, scale=True, share_project=True),
                                FullyConnectedMerge(160)),
        ),
        predictor=ChainPredictor(
            start_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            end_layer=BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8))
        )
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    train_batching = Batcher(45, "bucket_context_words_3", True, False)
    eval_batching = Batcher(45, "context_words", False, False)
    data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)
github allenai / document-qa / experimental / aligned_wiki_qa.py View on Github external
def main():
    corp = WikiArticleQaCorpus(SquadCorpus(), SquadWikiArticles(), True, 0.15)
    corp.get_train_docs()
github allenai / document-qa / train_squad / train_base6.py View on Github external
FullyConnected(160, "tanh"),
            BiRecurrentMapper(LstmCellSpec(80, keep_probs=0.8)),
            DropoutLayer(0.8),
            StaticAttentionSelf(TriLinear(bias=True), ConcatWithProduct()),
            FullyConnected(160, activation="tanh"),
            DropoutLayer(0.8),
        ),
        predictor=BoundsPredictor(ChainBiMapper(
            first_layer=BiRecurrentMapper(LstmCellSpec(80)),
            second_layer=BiRecurrentMapper(LstmCellSpec(80)),
        ))
    )
    with open(__file__, "r") as f:
        notes = f.read()

    corpus = SquadCorpus()
    train_batching = Batcher(45, "bucket_context_words_3", True, False)
    eval_batching = Batcher(45, "context_words", False, False)
    data = FixedParagraphQaTrainingData(corpus, None, train_batching, eval_batching)

    eval = [LossEvaluator(), BoundedSpanEvaluator(bound=[17]), SentenceSpanEvaluator()]
    trainer.start_training(data, model, train_params, eval, trainer.ModelDir(out), notes, False)