Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if publish_depends:
if depvar in ds.config:
# config vars are incremental, so make sure we start from
# scratch
ds.config.unset(depvar, where='local', reload=False)
for d in assure_list(publish_depends):
lgr.info(
'Configure additional publication dependency on "%s"',
d)
ds.config.add(depvar, d, where='local', reload=False)
ds.config.reload()
if publish_by_default:
if dfltvar in ds.config:
ds.config.unset(dfltvar, where='local', reload=False)
for refspec in assure_list(publish_by_default):
lgr.info(
'Configure additional default publication refspec "%s"',
refspec)
ds.config.add(dfltvar, refspec, 'local')
ds.config.reload()
assert isinstance(ds.repo, GitRepo) # just against silly code
if isinstance(ds.repo, AnnexRepo):
# we need to check if added sibling an annex, and try to enable it
# another part of the fix for #463 and #432
try:
exc = None
if not ds.config.obtain(
'remote.{}.annex-ignore'.format(name),
default=False,
valtype=EnsureBool(),
within ~/.gitconfig or your local repository's .git/config.
So, local definitions take precedence over remote ones and more specific
ones over more general ones.
Returns
-------
tuple
path, name, format string, help message
"""
ds = ds if isinstance(ds, Dataset) else Dataset(ds) if ds else None
# 1. check system and user account for procedure
for loc in (cfg.obtain('datalad.locations.user-procedures'),
cfg.obtain('datalad.locations.system-procedures')):
for dir in assure_list(loc):
for m, n in _get_file_match(dir, name):
yield (m, n,) + _get_proc_config(n)
# 2. check dataset for procedure
if ds is not None and ds.is_installed():
# could be more than one
dirs = assure_list(
ds.config.obtain('datalad.locations.dataset-procedures'))
for dir in dirs:
# TODO `get` dirs if necessary
for m, n in _get_file_match(op.join(ds.path, dir), name):
yield (m, n,) + _get_proc_config(n, ds=ds)
# 2.1. check subdatasets recursively
for subds in ds.subdatasets(return_type='generator',
result_xfm='datasets'):
for m, n, f, h in _get_procedure_implementation(name=name, ds=subds):
yield m, n, f, h
'description': 'web',
'here': False,
'urls': ['http://127.0.0.1:43442/about.txt', 'http://example.com/someurl']
}}
"""
if batch:
lgr.warning("TODO: --batch mode for whereis. Operating serially")
OUTPUTS = {'descriptions', 'uuids', 'full'}
if output not in OUTPUTS:
raise ValueError(
"Unknown value output=%r. Known are %s"
% (output, ', '.join(map(repr, OUTPUTS)))
)
options = assure_list(options, copy=True)
options += ["--key"] if key else []
json_objects = self._run_annex_command_json('whereis', args=options + files)
if output in {'descriptions', 'uuids'}:
return [
[remote.get(output[:-1]) for remote in j.get('whereis')]
if j.get('success') else []
for j in json_objects]
elif output == 'full':
# TODO: we might want to optimize storage since many remotes entries will be the
# same so we could just reuse them instead of brewing copies
return {j['key' if (key or '--all' in options) else 'file']:
self._whereis_json_to_dict(j)
for j in json_objects
if not j.get('key').endswith('.this-is-a-test-key')}
# flatten possibly existing multiple metadata sources
for src in dsinfo['metadata']:
if src.startswith('@'):
# not a source
continue
meta.update(dsinfo['metadata'][src])
metainfo = ''
for label, content in (
('', meta.get('description', meta.get('shortdescription', ''))),
('Author{}'.format('s' if isinstance(meta.get('author', None), list) else ''),
u'\n'.join([u'- {}'.format(a) for a in assure_list(meta.get('author', []))])),
('Homepage', meta.get('homepage', '')),
('Reference', meta.get('citation', '')),
('License', meta.get('license', '')),
('Keywords', u', '.join([u'`{}`'.format(k) for k in assure_list(meta.get('tag', []))])),
('Funding', meta.get('fundedby', '')),
):
if label and content:
metainfo += u'\n\n### {}\n\n{}'.format(label, content)
elif content:
metainfo += u'\n\n{}'.format(content)
for key in 'title', 'name', 'shortdescription':
if 'title' in meta:
break
if key in meta:
meta['title'] = meta[key]
default_content=u"""\
# {title}{metainfo}
# Ugly? Jep: #2055
content_info = zip(paths, ds.repo.file_has_content(paths), ds.repo.is_under_annex(paths))
paths = [p for p, c, a in content_info if not a or c]
nocontent = len(fullpathlist) - len(paths)
if nocontent:
# TODO better fail, or support incremental and label this file as no present
lgr.warning(
'{} files have no content present, '
'some extractors will not operate on {}'.format(
nocontent,
'them' if nocontent > 10
else [p for p, c, a in content_info if not c and a])
)
# pull out potential metadata field blacklist config settings
blacklist = [re.compile(bl) for bl in assure_list(ds.config.obtain(
'datalad.metadata.aggregate-ignore-fields',
default=[]))]
# enforce size limits
max_fieldsize = ds.config.obtain('datalad.metadata.maxfieldsize')
# keep local, who knows what some extractors might pull in
from pkg_resources import iter_entry_points # delayed heavy import
extractors = {ep.name: ep for ep in iter_entry_points('datalad.metadata.extractors')}
# we said that we want to fail, rather then just moan about less metadata
# Do an early check if all extractors are available so not to wait hours
# and then crash for some obvious reason
absent_extractors = [t for t in types if t not in extractors]
if absent_extractors:
raise ValueError(
'%d enabled metadata extractor%s not available in this installation'
': %s' %
def add_(self, files, git=True, git_options=None, update=False):
"""Like `add`, but returns a generator"""
# TODO: git_options is used as options for the git-add here,
# instead of options to the git executable => rename for consistency
if not git:
lgr.warning(
'GitRepo.add() called with git=%s, this should not happen',
git)
git = True
# there is no other way then to collect all files into a list
# at this point, because we need to pass them at once to a single
# `git add` call
files = [_normalize_path(self.path, f) for f in assure_list(files) if f]
if not (files or git_options or update):
# wondering why just a warning? in cmdline this is also not an error
lgr.warning("add was called with empty file list and no options.")
return
try:
# without --verbose git 2.9.3 add does not return anything
add_out = self._git_custom_command(
files,
# Set annex.largefiles to prevent storing files in annex when
# GitRepo() is instantiated with a v6+ annex repo.
['git', '-c', 'annex.largefiles=nothing', 'add'] +
assure_list(git_options) +
to_options(update=update) + ['--verbose']
)
tuple
path, name, format string, help message
"""
ds = ds if isinstance(ds, Dataset) else Dataset(ds) if ds else None
# 1. check system and user account for procedure
for loc in (cfg.obtain('datalad.locations.user-procedures'),
cfg.obtain('datalad.locations.system-procedures')):
for dir in assure_list(loc):
for m, n in _get_file_match(dir, name):
yield (m, n,) + _get_proc_config(n)
# 2. check dataset for procedure
if ds is not None and ds.is_installed():
# could be more than one
dirs = assure_list(
ds.config.obtain('datalad.locations.dataset-procedures'))
for dir in dirs:
# TODO `get` dirs if necessary
for m, n in _get_file_match(op.join(ds.path, dir), name):
yield (m, n,) + _get_proc_config(n, ds=ds)
# 2.1. check subdatasets recursively
for subds in ds.subdatasets(return_type='generator',
result_xfm='datasets'):
for m, n, f, h in _get_procedure_implementation(name=name, ds=subds):
yield m, n, f, h
# 3. check extensions for procedure
# delay heavy import until here
from pkg_resources import iter_entry_points
from pkg_resources import resource_isdir
from pkg_resources import resource_filename
if not exists(output_directory):
lgr.info(
"creating output directory at '{}'".format(output_directory))
os.makedirs(output_directory)
# prep for assay table info
protocols = OrderedDict()
for prop in assay_props:
info[prop] = []
# pull out essential metadata bits about the dataset itself
# for study description)
dsbidsmeta = getprop(dsmeta, ['metadata', 'bids'], {})
info['name'] = dsbidsmeta.get('shortdescription', dsbidsmeta.get('name', 'TODO'))
info['author'] = '\t'.join(assure_list(dsbidsmeta.get('author', [])))
info['keywords'] = '\t'.join(assure_list(dsbidsmeta.get('tag', [])))
# generate: s_study.txt
study_df = _get_study_df(dsmeta)
if study_df.empty:
# no samples, no assays, no metadataset
return None
_gather_protocol_parameters_from_df(study_df, protocols)
_store_beautiful_table(
study_df,
output_directory,
"s_study.txt")
info['studytab_filename'] = 's_study.txt'
deface_df = None
# all imaging modalities recognized in BIDS
#TODO maybe fold 'defacemask' into each suffix as a derivative
('{}-={}', remove)):
if d:
spec.extend(_genspec(expr, d))
# prefix all with '-s' and extend arg list
args.extend(j for i in zip(['-s'] * len(spec), spec) for j in i)
if purge:
# and all '-r' args
args.extend(j for i in zip(['-r'] * len(purge), purge)
for j in i)
if not args:
return
if recursive:
args.append('--force')
# append actual file path arguments
args.extend(assure_list(files))
# XXX do we need the return values for anything?
self._run_annex_command_json(
'metadata',
args)
def get_query(self, query):
query = assure_list(query)
simple_fieldspec = re.compile(r"(?P\S*?):(?P.*)")
quoted_fieldspec = re.compile(r"'(?P[^']+?)':(?P.*)")
query_rec_matches = [
simple_fieldspec.match(q) or
quoted_fieldspec.match(q) or
q
for q in query]
query_group_dicts_only = [
q.groupdict() for q in query_rec_matches if hasattr(q, 'groupdict')
]
self._queried_keys = [
qgd['field']
for qgd in query_group_dicts_only
if ('field' in qgd and qgd['field'])
]
if len(query_group_dicts_only) != len(query_rec_matches):