Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if ext is not None and ext not in avail_exts:
raise ValueError(
'Please provide one of the available extensions.\n'
f'{avail_exts}'
)
else:
ext = is_valid_filename(filename, return_ext=True)
is_present = _check_datafile_present_and_download(
filename,
backup_url=backup_url,
)
if not is_present: logg.debug(f'... did not find original file {filename}')
# read hdf5 files
if ext in {'h5', 'h5ad'}:
if sheet is None:
return read_h5ad(filename, backed=backed)
else:
logg.debug(f'reading sheet {sheet} from file {filename}')
return read_hdf(filename, sheet)
# read other file types
path_cache = settings.cachedir / _slugify(filename).replace('.' + ext, '.h5ad') # type: Path
if path_cache.suffix in {'.gz', '.bz2'}:
path_cache = path_cache.with_suffix('')
if cache and path_cache.is_file():
logg.info(f'... reading from cache file {path_cache}')
return read_h5ad(path_cache)
if not is_present:
raise FileNotFoundError(f'Did not find file {filename}.')
logg.debug(f'reading {filename}')
if not cache and not suppress_cache_warning:
logg.hint(
cache=cache,
cache_compression=cache_compression,
**kwargs,
)
# generate filename and read to dict
filekey = str(filename)
filename = settings.writedir / (filekey + '.' + settings.file_format_data)
if not filename.exists():
raise ValueError(
f'Reading with filekey {filekey!r} failed, '
f'the inferred filename {filename!r} does not exist. '
'If you intended to provide a filename, either use a filename '
f'ending on one of the available extensions {avail_exts} '
'or pass the parameter `ext`.'
)
return read_h5ad(filename, backed=backed)
)
if not is_present: logg.debug(f'... did not find original file {filename}')
# read hdf5 files
if ext in {'h5', 'h5ad'}:
if sheet is None:
return read_h5ad(filename, backed=backed)
else:
logg.debug(f'reading sheet {sheet} from file {filename}')
return read_hdf(filename, sheet)
# read other file types
path_cache = settings.cachedir / _slugify(filename).replace('.' + ext, '.h5ad') # type: Path
if path_cache.suffix in {'.gz', '.bz2'}:
path_cache = path_cache.with_suffix('')
if cache and path_cache.is_file():
logg.info(f'... reading from cache file {path_cache}')
return read_h5ad(path_cache)
if not is_present:
raise FileNotFoundError(f'Did not find file {filename}.')
logg.debug(f'reading {filename}')
if not cache and not suppress_cache_warning:
logg.hint(
'This might be very slow. Consider passing `cache=True`, '
'which enables much faster reading from a cache file.'
)
# do the actual reading
if ext == 'xlsx' or ext == 'xls':
if sheet is None:
raise ValueError(
"Provide `sheet` parameter when reading '.xlsx' files."
)
else:
if not os.path.exists(filename) and backup_url is None:
raise FileNotFoundError('Did not find file {}.'.format(filename))
elif not os.path.exists(filename):
d = os.path.dirname(filename)
if not os.path.exists(d): os.makedirs(d)
urlretrieve(backup_url, filename)
ext = Path(filename).suffixes[-1][1:]
if ext in numpy_ext:
return np.load(filename, **kwargs)
elif ext in pandas_ext:
return pd.read_csv(filename, **kwargs)
elif ext in adata_ext:
return anndata.read(filename, **kwargs)
else:
raise ValueError('"{}" does not end on a valid extension.\n'
'Please, provide one of the available extensions.\n{}\n'
.format(filename, numpy_ext | pandas_ext))