How to use the squad.build_dataset.SquadCorpus function in squad

To help you get started, we’ve selected a few squad examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github allenai / document-qa / experimental / squad_text_labels.py View on Github external
def count_answers():
    data = SquadCorpus()
    computed = preprocess(tqdm(data.get_dev(), desc="tagging"))
    counts = Counter([len(x.answer.answer_spans) for x in computed])

    for i in range(0, 11):
        print("%d: %d (%.4f)" % (i, counts[i], counts[i]/len(computed)))
github allenai / document-qa / train_squad / train2.py View on Github external
ResidualLayer(recurrent_layer),
            AttentionEncoder(post_process=MapperSeq(FullyConnected(25, activation="tanh"), DropoutLayer(0.8))),
            WithProjectedProduct(include_tiled=True),
            ChainBiMapper(
                first_layer=recurrent_layer,
                second_layer=recurrent_layer
            ),
            IndependentBoundsJointLoss()
        )
    )
    with open(__file__, "r") as f:
        notes = f.read()

    train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
    eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
    data = DocumentQaTrainingData(SquadCorpus(), None, train_batching, eval_batching)

    eval = [LossEvaluator(), SpanProbability(), BoundedSquadSpanEvaluator(bound=[17])]
    trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes, False)
github allenai / document-qa / experimental / squad_text_labels.py View on Github external
def check_answers():
    data = SquadCorpus()
    computed = preprocess(tqdm(data.get_train(), desc="tagging"))
    for para in tqdm(computed, desc="checking"):
        for (start, end) in para.answer.answer_spans:
            text = para.paragraph.get_original_text(start, end)
            if not any(exact_match_score(x, text) for x in para.answer.answer_text):
                raise ValueError()
github allenai / document-qa / experimental / squad_text_labels.py View on Github external
def test_build_training_data():
    train_batching = ClusteredBatcher(60, ContextLenBucketedKey(3), True, False)
    eval_batching = ClusteredBatcher(60, ContextLenKey(), False, False)
    data = PreprocessedData(SquadCorpus(),
                            TagTextAnswers(),
                            ParagraphAndQuestionDatasetBuilder(train_batching, eval_batching),
                            eval_on_verified=False,
                            sample=20, sample_dev=20
                            # sample_dev=100, sample=100, eval_on_verified=False
                            )
    data.preprocess()
    data = data.get_train()
    for batch in data.get_epoch():
        for x in batch:
            print(x.answer.answer_spans.shape)
github allenai / document-qa / train_squad / train_text_answers.py View on Github external
ResidualLayer(BiRecurrentMapper(GruCellSpec(80))),
            AttentionEncoder(post_process=MapperSeq(FullyConnected(25, activation="tanh"), DropoutLayer(0.8))),
            WithProjectedProduct(include_tiled=True),
            ChainBiMapper(
                first_layer=BiRecurrentMapper(GruCellSpec(80)),
                second_layer=BiRecurrentMapper(GruCellSpec(80))
            ),
            aggregate="sum"
        )
    )
    with open(__file__, "r") as f:
        notes = f.read()

    train_batching = ClusteredBatcher(45, ContextLenBucketedKey(3), True, False)
    eval_batching = ClusteredBatcher(45, ContextLenKey(), False, False)
    data = PreprocessedData(SquadCorpus(),
                            TagTextAnswers(),
                            ParagraphAndQuestionDatasetBuilder(train_batching, eval_batching),
                            # sample=20, sample_dev=20,
                            eval_on_verified=False)
    data.preprocess()

    eval = [LossEvaluator(), BoundedSquadSpanEvaluator(bound=[17])]
    trainer.start_training(data, model, train_params, eval, model_dir.ModelDir(out), notes, False)