Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
package's default path for downloaded data.
overwrite : :obj:`bool`, optional
Whether to overwrite existing files or not. Default is False.
verbose : :obj:`int`, optional
Default is 1.
Returns
-------
data_dir : :obj:`str`
Updated data directory pointing to dataset files.
"""
url = 'https://neurovault.org/collections/1425/download'
dataset_name = 'nidm_21pain'
data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose)
desc_file = op.join(data_dir, 'description.txt')
if op.isfile(desc_file) and overwrite is False:
return data_dir
# Download
fname = op.join(data_dir, url.split('/')[-1])
_download_zipped_file(url, filename=fname)
# Unzip
with zipfile.ZipFile(fname, 'r') as zip_ref:
zip_ref.extractall(data_dir)
collection_folders = [f for f in glob(op.join(data_dir, '*')) if '.nidm' not in f]
collection_folders = [f for f in collection_folders if op.isdir(f)]
if len(collection_folders) > 1:
raise Exception('More than one folder found: '
def download_peaks2maps_model(data_dir=None, overwrite=False, verbose=1):
"""
Download the trained Peaks2Maps model from OHBM 2018.
"""
url = "https://zenodo.org/record/1257721/files/ohbm2018_model.tar.xz?download=1"
temp_dataset_name = 'peaks2maps_model_ohbm2018__temp'
temp_data_dir = _get_dataset_dir(temp_dataset_name, data_dir=data_dir, verbose=verbose)
dataset_name = 'peaks2maps_model_ohbm2018'
data_dir = temp_data_dir.replace(temp_dataset_name, dataset_name)
desc_file = op.join(data_dir, 'description.txt')
if op.isfile(desc_file) and overwrite is False:
shutil.rmtree(temp_data_dir)
return data_dir
LGR.info('Downloading the model (this is a one-off operation)...')
# Streaming, so we can iterate over the response.
r = requests.get(url, stream=True)
f = BytesIO()
# Total size in bytes.
total_size = int(r.headers.get('content-length', 0))
Location in which to place MALLET. Default is None, which uses the
package's default path for downloaded data.
overwrite : :obj:`bool`, optional
Whether to overwrite existing files or not. Default is False.
verbose : :obj:`int`, optional
Default is 1.
Returns
-------
data_dir : :obj:`str`
Updated data directory pointing to MALLET files.
"""
url = 'http://mallet.cs.umass.edu/dist/mallet-2.0.7.tar.gz'
temp_dataset_name = 'mallet__temp'
temp_data_dir = _get_dataset_dir(temp_dataset_name, data_dir=data_dir, verbose=verbose)
dataset_name = 'mallet'
data_dir = temp_data_dir.replace(temp_dataset_name, dataset_name)
desc_file = op.join(data_dir, 'description.txt')
if op.isfile(desc_file) and overwrite is False:
shutil.rmtree(temp_data_dir)
return data_dir
mallet_file = op.join(temp_data_dir, op.basename(url))
_download_zipped_file(url, mallet_file)
with tarfile.open(mallet_file) as tf:
tf.extractall(path=temp_data_dir)
os.rename(op.join(temp_data_dir, 'mallet-2.0.7'), data_dir)
def __init__(self, text_df, text_column='abstract', n_topics=50,
n_iters=1000, alpha='auto', beta=0.001):
mallet_dir = download_mallet()
mallet_bin = op.join(mallet_dir, 'bin/mallet')
model_dir = utils._get_dataset_dir('mallet_model')
text_dir = op.join(model_dir, 'texts')
if not op.isdir(model_dir):
os.mkdir(model_dir)
if alpha == 'auto':
alpha = 50. / n_topics
elif not isinstance(alpha, float):
raise ValueError('Argument alpha must be float or "auto"')
self.params = {
'n_topics': n_topics,
'n_iters': n_iters,
'alpha': alpha,
'beta': beta,
}
verbose : :obj:`int`, optional
Default is 1.
Returns
-------
out_dict : :obj:`dict`
Dictionary with two keys: 'ids' and 'relationships'. Each points to a
csv file. The 'ids' file contains CogAt identifiers, canonical names,
and aliases, sorted by alias length (number of characters).
The 'relationships' file contains associations between CogAt items,
with three columns: input, output, and rel_type (relationship type).
"""
from cognitiveatlas.api import get_concept, get_task, get_disorder
dataset_name = 'cognitive_atlas'
data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose)
ids_file = op.join(data_dir, 'cogat_aliases.csv')
rels_file = op.join(data_dir, 'cogat_relationships.csv')
if overwrite or not all([op.isfile(f) for f in [ids_file, rels_file]]):
concepts = get_concept(silent=True).pandas
tasks = get_task(silent=True).pandas
disorders = get_disorder(silent=True).pandas
# Identifiers and aliases
long_concepts = _longify(concepts)
long_tasks = _longify(tasks)
# Disorders currently lack aliases
disorders['name'] = disorders['name'].str.lower()
disorders = disorders.assign(alias=disorders['name'])
disorders = disorders[['id', 'name', 'alias']]