Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build(opt):
dpath = os.path.join(opt['datapath'], 'MutualFriends')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'WoZ')
version = 'None'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fnames = ['woz_test_en.json', 'woz_train_en.json', 'woz_validate_en.json']
for fname in fnames:
url = (
'https://github.com/nmrksic/'
'neural-belief-tracker/raw/master/data/woz/' + fname
)
build_data.download(url, dpath, fname)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'SQuAD2')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'TalkTheWalk')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'talkthewalk.tgz'
url = 'https://dl.fbaipublicfiles.com/parlai/projects/talkthewalk/' + fname
build_data.download(url, dpath, fname)
build_data.untar(dpath, fname)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'QA-ZRE')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'nlvr')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# make a clean directory if needed
if build_data.built(dpath):
# an older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data from github
fname = 'nlvr.zip'
url = 'https://github.com/clic-lab/nlvr/' 'archive/master.zip'
print('[downloading data from: ' + url + ']')
build_data.download(url, dpath, fname)
build_data.untar(dpath, fname)
# Mark as done
build_data.mark_done(dpath, version_string=version)
def build(datapath, use_history):
dpath = os.path.join(datapath, 'OpenSubtitles2018')
if not use_history:
dpath += '_no_history'
version = '1'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
untar_path = os.path.join(dpath, 'OpenSubtitles', 'xml', 'en')
if len(glob.glob(untar_path + '/*/*/*.xml')) != NUM_SUBTITLES_FILES:
# Download the data.
url = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/xml/en.zip'
build_data.download(url, dpath, 'OpenSubtitles2018.zip')
build_data.untar(dpath, 'OpenSubtitles2018.zip')
create_fb_format(untar_path, dpath, use_history)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
return dpath
def build(opt):
# get path to data directory
dpath = os.path.join(opt['datapath'], 'taskmaster-1')
# define version if any
version = "1.0"
# check if data had been previously built
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# make a clean directory if needed
if build_data.built(dpath):
# an older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# mark the data as built
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'CNN_DM')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
for i, f in enumerate(RESOURCES[2:5]):
dt = data_type[i]
urls_fname = os.path.join(dpath, f.file_name)
split_fname = os.path.join(dpath, dt + '.txt')
with open(urls_fname) as urls_file, open(split_fname, 'a') as split_file:
for url in urls_file:
file_name = hashlib.sha1(url.strip().encode('utf-8')).hexdigest()
split_file.write("cnn/stories/{}.story\n".format(file_name))
def build(opt):
version = 'v1.1'
dpath = os.path.join(opt['datapath'], 'Twitter')
if not build_data.built(dpath, version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname1 = "twitter_en_big.txt.gz.partaa"
fname2 = "twitter_en_big.txt.gz.partab"
url = 'https://github.com/Marsan-Ma/chat_corpus/raw/master/'
build_data.download(url + fname1, dpath, fname1)
build_data.download(url + fname2, dpath, fname2)
file1 = os.path.join(dpath, fname1)
file2 = os.path.join(dpath, fname2)
file3 = "twitter_en_big.txt.gz"
outzipfile = os.path.join(dpath, file3)
build_data.cat(file1, file2, outzipfile)
import gzip