Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build(opt):
dpath = os.path.join(opt['datapath'], 'BookTest')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'DBLL')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'dbll.tgz'
url = 'https://s3.amazonaws.com/fair-data/parlai/dbll/' + fname
build_data.download(url, dpath, fname)
build_data.untar(dpath, fname)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
"""Create train and validation data for synthetic shapes described by attributes."""
dpath = os.path.join(opt['datapath'], 'taskntalk')
if not build_data.built(dpath):
print('[building data: ' + dpath + ']')
build_data.make_dir(os.path.join(dpath, 'large'))
build_data.make_dir(os.path.join(dpath, 'small'))
# save training and validation data
to_save = {
'attributes': ['color', 'shape', 'style'],
'task_defn': [
['color', 'shape'],
['shape', 'color'],
['color', 'style'],
['style', 'color'],
['shape', 'style'],
['style', 'shape'],
],
}
def build(opt):
dpath = os.path.join(opt['datapath'], 'wmt')
version = 'None'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
train_r_fnames = ('train.en', 'train.de')
train_w_fname = 'en_de_train.txt'
valid_w_fname = 'en_de_valid.txt'
test_r_fnames = ('newstest2014.en', 'newstest2014.de')
test_w_fname = 'en_de_test.txt'
def build(opt):
dpath = os.path.join(opt['datapath'], 'qangaroo')
version = 'v1.1'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'qangaroo.zip'
g_ID = "1ytVZ4AhubFDOEL7o7XrIRIyhU8g9wvKA"
print("downloading ...")
build_data.download_from_google_drive(g_ID, os.path.join(dpath, fname))
build_data.untar(dpath, fname)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'MWSC')
version = 'None'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'schema.txt'
url = (
'https://raw.githubusercontent.com/salesforce/decanlp/'
'd594b2bf127e13d0e61151b6a2af3bf63612f380/local_data/' + fname
)
build_data.download(url, dpath, fname)
pattern = '\\[.*\\]'
def build(opt):
dpath = os.path.join(opt['datapath'], 'VQA-v1')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname1 = 'Questions_Train_mscoco.zip'
fname2 = 'Questions_Val_mscoco.zip'
fname3 = 'Questions_Test_mscoco.zip'
fname4 = 'Annotations_Val_mscoco.zip'
fname5 = 'Annotations_Train_mscoco.zip'
url = 'http://visualqa.org/data/mscoco/vqa/'
build_data.download(url + fname1, dpath, fname1)
build_data.download(url + fname2, dpath, fname2)
build_data.download(url + fname3, dpath, fname3)
def build(opt):
dpath = os.path.join(opt['datapath'], 'CCPE')
version = '1.0'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'personality_captions')
image_path = os.path.join(opt['datapath'], 'yfcc_images')
version = '2.0'
if not build_data.built(dpath, version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
build_data.mark_done(dpath, version)
if not build_data.built(image_path, version) and not opt.get('yfcc_path'):
download_images(opt)
def build(opt):
dpath = os.path.join(opt['datapath'], 'CoQA')
version = VERSION
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
with open(os.path.join(dpath, 'coqa-train-v1.0.json')) as f:
data = json.load(f)['data']
make_parlai_format(dpath, 'train', data)
with open(os.path.join(dpath, 'coqa-dev-v1.0.json')) as f:
data = json.load(f)['data']