Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Download and build the data if it does not exist.
import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile
RESOURCES = [
DownloadableFile(
'http://nlp.cs.washington.edu/triviaqa/data/triviaqa-rc.tar.gz',
'triviaqa-rc.tar.gz',
'ef94fac6db0541e5bb5b27020d067a8b13b1c1ffc52717e836832e02aaed87b9',
)
]
def build(opt):
dpath = os.path.join(opt['datapath'], 'TriviaQA')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
import os
import json
VERSION = '1'
TRAIN_FILENAME = 'hotpot_train_v{}.1.json'.format(VERSION)
DEV_DISTRACTOR_FILENAME = 'hotpot_dev_distractor_v{}.json'.format(VERSION)
DEV_FULLWIKI_FILENAME = 'hotpot_dev_fullwiki_v{}.json'.format(VERSION)
RESOURCES = [
DownloadableFile(
'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_train_v1.1.json',
'hotpot_train_v1.1.json',
'26650cf50234ef5fb2e664ed70bbecdfd87815e6bffc257e068efea5cf7cd316',
zipped=False,
),
DownloadableFile(
'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_distractor_v1.json',
'hotpot_dev_distractor_v1.json',
'4e9ecb5c8d3b719f624d66b60f8d56bf227f03914f5f0753d6fa1b359d7104ea',
zipped=False,
),
DownloadableFile(
'http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json',
'hotpot_dev_fullwiki_v1.json',
'2f1f3e594a3066a3084cc57950ca2713c24712adaad03af6ccce18d1846d5618',
zipped=False,
),
]
OUTPUT_FORMAT = 'text:{context_question}\t' 'labels:{answer}'
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.
import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile
RESOURCES = [
DownloadableFile(
'https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json',
'train-v2.0.json',
'68dcfbb971bd3e96d5b46c7177b16c1a4e7d4bdef19fb204502738552dede002',
zipped=False,
),
DownloadableFile(
'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json',
'dev-v2.0.json',
'80a5225e94905956a6446d296ca1093975c4d3b3260f1d6c8f68bc2ab77182d8',
zipped=False,
),
]
def build(opt):
dpath = os.path.join(opt['datapath'], 'SQuAD2')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
RESOURCES = [
DownloadableFile(
'http://parl.ai/downloads/dialogue_safety/single_turn_safety.json',
'single_turn_safety.json',
'f3a46265aa639cfa4b55d2be4dca4be1c596acb5e8f94d7e0041e1a54cedd4cd',
zipped=False,
),
DownloadableFile(
'http://parl.ai/downloads/dialogue_safety/multi_turn_safety.json',
'multi_turn_safety.json',
'e3e577f456d63d51eb7b5f98ffd251ad695476f186d422fa8de1a177742fa7b6',
zipped=False,
),
]
def build(datapath):
version = 'v1.0'
dpath = os.path.join(datapath, 'dialogue_safety')
if not build_data.built(dpath, version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.
import parlai.core.build_data as build_data
import os
from parlai.core.build_data import DownloadableFile
RESOURCES = [
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Train_mscoco.zip',
'Questions_Train_mscoco.zip',
'c3b2bb6155528eeae95e0a914af394d6f0d98f8f2b51012c44b27778e1a96707',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Val_mscoco.zip',
'Questions_Val_mscoco.zip',
'e8839be5de2d711989bf0adc82e6717d1ce307d27c9b1dfb0abf413b79a5d4d0',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Test_mscoco.zip',
'Questions_Test_mscoco.zip',
'bd080c297fc863bf8258caa4864d3b5afab29373375a6637f8546338291e28c0',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Val_mscoco.zip',
'Annotations_Val_mscoco.zip',
'29377c35186d90aeab3e61bdad890f51215d1f88b700bd22ef19004d73bf284f',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Train_mscoco.zip',
# Copyright (c) Facebook, Inc. and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Download and build the data if it does not exist.
import os
import json
from parlai.core.build_data import DownloadableFile
import parlai.core.build_data as build_data
RESOURCES = [
DownloadableFile(
'http://lnsigo.mipt.ru/export/datasets/convai/convai2_wild_evaluation_0.2.tgz',
'convai2_wild_evaluation_0.2.tgz',
'd40ff70275c8d1939a8081707edcf4e71072097d18b9998100a1099d23e29801',
)
]
def make_parlai_format(data: list, dpath: str):
train_p = 0.6
valid_p = 0.2
test_p = 1 - (train_p + valid_p)
assert train_p > 0
assert valid_p > 0
assert test_p > 0
DownloadableFile(
'http://parl.ai/downloads/COCO-IMG/train2017.zip',
'train2017.zip',
'69a8bb58ea5f8f99d24875f21416de2e9ded3178e903f1f7603e283b9e06d929',
),
DownloadableFile(
'http://parl.ai/downloads/COCO-IMG/val2017.zip',
'val2017.zip',
'4f7e2ccb2866ec5041993c9cf2a952bbed69647b115d0f74da7ce8f4bef82f05',
),
DownloadableFile(
'http://parl.ai/downloads/COCO-IMG/test2017.zip',
'test2017.zip',
'c7908c3c9f94ba2f3340ebbeec58c25db6be8774f18d68c2f15d0e369d95baba',
),
DownloadableFile(
'http://images.cocodataset.org/annotations/annotations_trainval2017.zip',
'annotations_trainval2017.zip',
'113a836d90195ee1f884e704da6304dfaaecff1f023f49b6ca93c4aaae470268',
),
DownloadableFile(
'http://images.cocodataset.org/annotations/image_info_test2017.zip',
'image_info_test2017.zip',
'e52f412dd7195ac8f98d782b44c6dd30ea10241e9f42521f67610fbe055a74f8',
),
]
def buildImage(opt):
dpath = os.path.join(opt['datapath'], 'COCO-IMG-2017')
version = '1'
'ad69010002210b7c406718248ee66e65868b9f6820f163aa966369878d14147e',
from_google=True,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_training_urls.txt',
'cnn_wayback_training_urls.txt',
'e074c2245c475b00c455cefb911e0066b27fe17085dd0c773101e10d3088583b',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_validation_urls.txt',
'cnn_wayback_validation_urls.txt',
'b1ae81ff058ca640da3ae2b3c98fefca3adfea358736b6e29efc2ec1cbef5b5c',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_test_urls.txt',
'cnn_wayback_test_urls.txt',
'a0796c3c7812e3c9fcb1a65faa9aee7bb6f8a3869e953c7f61b401790c0a6f33',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_training_urls.txt',
'dailymail_wayback_training_urls.txt',
'3913d6a90c29a81196128346d81c28d6c7f7e91777d886e8417163ce83b2a04a',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_validation_urls.txt',
'dailymail_wayback_validation_urls.txt',
'2377b8f809bd07b143bbbd9e60594d10e7b8a211c8a5672181ea6000bbf548a2',
zipped=False,
'e074c2245c475b00c455cefb911e0066b27fe17085dd0c773101e10d3088583b',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_validation_urls.txt',
'cnn_wayback_validation_urls.txt',
'b1ae81ff058ca640da3ae2b3c98fefca3adfea358736b6e29efc2ec1cbef5b5c',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/cnn_wayback_test_urls.txt',
'cnn_wayback_test_urls.txt',
'a0796c3c7812e3c9fcb1a65faa9aee7bb6f8a3869e953c7f61b401790c0a6f33',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_training_urls.txt',
'dailymail_wayback_training_urls.txt',
'3913d6a90c29a81196128346d81c28d6c7f7e91777d886e8417163ce83b2a04a',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_validation_urls.txt',
'dailymail_wayback_validation_urls.txt',
'2377b8f809bd07b143bbbd9e60594d10e7b8a211c8a5672181ea6000bbf548a2',
zipped=False,
),
DownloadableFile(
'https://raw.githubusercontent.com/abisee/cnn-dailymail/master/url_lists/dailymail_wayback_test_urls.txt',
'dailymail_wayback_test_urls.txt',
'554d18fc79a06a16902662d926cb7cc981ea36a3f82d5ae1426e25bf62f65b87',
zipped=False,
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Val_mscoco.zip',
'Questions_Val_mscoco.zip',
'e8839be5de2d711989bf0adc82e6717d1ce307d27c9b1dfb0abf413b79a5d4d0',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Questions_Test_mscoco.zip',
'Questions_Test_mscoco.zip',
'bd080c297fc863bf8258caa4864d3b5afab29373375a6637f8546338291e28c0',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Val_mscoco.zip',
'Annotations_Val_mscoco.zip',
'29377c35186d90aeab3e61bdad890f51215d1f88b700bd22ef19004d73bf284f',
),
DownloadableFile(
'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/Annotations_Train_mscoco.zip',
'Annotations_Train_mscoco.zip',
'a5f5f97c162a4ad44896be08bac6deaa258aa3fec281afcc84fe85ae44cb1ebc',
),
]
def build(opt):
dpath = os.path.join(opt['datapath'], 'VQA-v1')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)