Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
task = opt['image_load_task']
prepath = os.path.join(opt['datapath'], task)
imagefn = ''.join(zipname.strip('.zip').split('/')[-2:]) + path.name
if mode == 'raw':
# raw just returns RGB values
return Image.open(path).convert('RGB')
elif mode == 'ascii':
# convert images to ascii ¯\_(ツ)_/¯
return self._img_to_ascii(path)
else:
# otherwise, looks for preprocessed version under 'mode' directory
if not is_zip:
prepath, imagefn = os.path.split(path)
dpath = os.path.join(prepath, mode)
if not os.path.exists(dpath):
build_data.make_dir(dpath)
imagefn = imagefn.split('.')[0]
new_path = os.path.join(prepath, mode, imagefn)
if not os.path.isfile(new_path):
return self.extract(Image.open(path).convert('RGB'), new_path)
else:
return self.torch.load(new_path)
def build(opt):
version = 'v1.1'
dpath = os.path.join(opt['datapath'], 'Twitter')
if not build_data.built(dpath, version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname1 = "twitter_en_big.txt.gz.partaa"
fname2 = "twitter_en_big.txt.gz.partab"
url = 'https://github.com/Marsan-Ma/chat_corpus/raw/master/'
build_data.download(url + fname1, dpath, fname1)
build_data.download(url + fname2, dpath, fname2)
file1 = os.path.join(dpath, fname1)
file2 = os.path.join(dpath, fname2)
file3 = "twitter_en_big.txt.gz"
outzipfile = os.path.join(dpath, file3)
build_data.cat(file1, file2, outzipfile)
import gzip
def build(opt):
dpath = os.path.join(opt['datapath'], 'CLEVR')
version = 'v1.0'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# An older version exists, so remove these outdated files.
if build_data.built(dpath):
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'CLEVR_v1.0.zip'
url = 'https://dl.fbaipublicfiles.com/clevr/'
build_data.download(url + fname, dpath, fname)
build_data.untar(dpath, fname)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
"""
Downloads required embeddings and chars dictionary for agent.
Builds a folders tree.
Args:
opt: parameters from command line
Returns:
nothing
"""
# get path to data directory and create folders tree
dpath = join(opt['model_file'])
# define languages
language = opt['language']
dpath = join(dpath, language, 'agent')
build_data.make_dir(dpath)
build_data.make_dir(join(dpath, 'embeddings'))
build_data.make_dir(join(dpath, 'vocab'))
build_data.make_dir(join(dpath, 'logs', opt['name']))
if not isfile(join(dpath, 'embeddings', 'embeddings_lenta_100.vec')):
print('[Download the word embeddings]...')
try:
embed_url = os.environ['EMBEDDINGS_URL'] + 'embeddings_lenta_100.vec'
build_data.download(embed_url, join(dpath, 'embeddings'), 'embeddings_lenta_100.vec')
print('[End of download the word embeddings]...')
except RuntimeWarning:
raise('To use your own embeddings, please, put the file embeddings_lenta_100.vec in the folder '
'{0}'.format(join(dpath, 'embeddings')))
if not isfile(join(dpath, 'embeddings', 'ft_0.8.3_nltk_yalen_sg_300.bin')):
def build(opt):
dpath = os.path.join(opt['datapath'], 'CoQA')
version = VERSION
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
with open(os.path.join(dpath, 'coqa-train-v1.0.json')) as f:
data = json.load(f)['data']
make_parlai_format(dpath, 'train', data)
with open(os.path.join(dpath, 'coqa-dev-v1.0.json')) as f:
data = json.load(f)['data']
make_parlai_format(dpath, 'valid', data)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
# Download the data.
for downloadable_file in RESOURCES[:2]:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
if 'fulldoc' in opt['task']:
dpath += '-fulldoc'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
RESOURCES[2].download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
version = '0.2'
dpath = os.path.join(opt['datapath'], 'ConvAI2_wild_evaluation')
if not build_data.built(dpath, version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
output_fname = 'convai2_wild_evaluation.json'
output_path = os.path.join(dpath, output_fname)
with open(output_path, 'r') as data_f:
data = json.load(data_f)
make_parlai_format(data, dpath)
os.remove(output_path)
# Mark the data as built.
build_data.mark_done(dpath, version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'mnist')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'negotiation')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# make a clean directory if needed
if build_data.built(dpath):
# an older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data from github
fname = 'negotiation.zip'
url = (
'https://github.com/facebookresearch/end-to-end-negotiator/'
'archive/master.zip'
)
print('[downloading data from: ' + url + ']')
build_data.download(url, dpath, fname)
build_data.untar(dpath, fname)
# Mark as done
build_data.mark_done(dpath, version_string=version)
nothing
"""
# get path to data directory
dpath = os.path.join(opt['datapath'], 'insults')
# define version if any
version = '1.0'
# check if data had been previously built
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# make a clean directory if needed
if build_data.built(dpath):
# an older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
raw_path = os.path.abspath(opt['raw_dataset_path'] or ".")
train_file = os.path.join(raw_path, 'train.csv')
valid_file = os.path.join(raw_path, 'test_with_solutions.csv')
test_file = os.path.join(raw_path, 'impermium_verification_labels.csv')
if not os.path.isfile(train_file) or not os.path.isfile(valid_file) or not os.path.isfile(test_file):
ds_path = os.environ.get('DATASETS_URL')
file_name = 'insults.tar.gz'
if not ds_path:
raise RuntimeError('Please download dataset files from'
' https://www.kaggle.com/c/detecting-insults-in-social-commentary/data'
' and set path to their directory in raw-dataset-path parameter')
print('Trying to download a insults dataset from the repository')
url = urllib.parse.urljoin(ds_path, file_name)
print(repr(url))
build_data.download(url, dpath, file_name)