Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_url_cache_filename(url, name=None):
"""Return a filename where to cache online doc from a url"""
if not name:
name = "misc"
cache_dir = opj(cfg.obtain('datalad.locations.cache'), name)
doc_fname = opj(
cache_dir,
'{}-{}.p{}'.format(
urlsplit(url).netloc,
md5(url.encode('utf-8')).hexdigest(),
pickle.HIGHEST_PROTOCOL)
)
return doc_fname
if not exists(index_dir):
os.makedirs(index_dir)
# this is a pretty cheap call that just pull this info from a file
dsinfo = self.ds.metadata(
get_aggregates=True,
return_type='list',
result_renderer='disabled')
self._mk_schema(dsinfo)
idx_obj = widx.create_in(index_dir, self.schema)
idx = idx_obj.writer(
# cache size per process
limitmb=cfg.obtain('datalad.search.indexercachesize'),
# disable parallel indexing for now till #1927 is resolved
## number of processes for indexing
#procs=multiprocessing.cpu_count(),
## write separate index segments in each process for speed
## asks for writer.commit(optimize=True)
#multisegment=True,
)
# load metadata of the base dataset and what it knows about all its subdatasets
# (recursively)
old_idx_size = 0
old_ds_rpath = ''
idx_size = 0
log_progress(
lgr.info,
'autofieldidxbuild',
before), the addition (or just an update) of a (sub-)dataset would otherwise
surprisingly cause you do execute code different from what you defined
within ~/.gitconfig or your local repository's .git/config.
So, local definitions take precedence over remote ones and more specific
ones over more general ones.
Returns
-------
tuple
path, name, format string, help message
"""
ds = ds if isinstance(ds, Dataset) else Dataset(ds) if ds else None
# 1. check system and user account for procedure
for loc in (cfg.obtain('datalad.locations.user-procedures'),
cfg.obtain('datalad.locations.system-procedures')):
for dir in assure_list(loc):
for m, n in _get_file_match(dir, name):
yield (m, n,) + _get_proc_config(n)
# 2. check dataset for procedure
if ds is not None and ds.is_installed():
# could be more than one
dirs = assure_list(
ds.config.obtain('datalad.locations.dataset-procedures'))
for dir in dirs:
# TODO `get` dirs if necessary
for m, n in _get_file_match(op.join(ds.path, dir), name):
yield (m, n,) + _get_proc_config(n, ds=ds)
# 2.1. check subdatasets recursively
for subds in ds.subdatasets(return_type='generator',
result_xfm='datasets'):
def _get_result_filter(cls, args):
from datalad import cfg
result_filter = None
if args.common_report_status or 'datalad.runtime.report-status' in cfg:
report_status = args.common_report_status or \
cfg.obtain('datalad.runtime.report-status')
if report_status == "all":
pass # no filter
elif report_status == 'success':
result_filter = EnsureKeyChoice('status', ('ok', 'notneeded'))
elif report_status == 'failure':
result_filter = EnsureKeyChoice('status',
('impossible', 'error'))
else:
result_filter = EnsureKeyChoice('status', (report_status,))
if args.common_report_type:
tfilt = EnsureKeyChoice('type', tuple(args.common_report_type))
result_filter = result_filter & tfilt if result_filter else tfilt
return result_filter
git_opts=initopts,
fake_dates=fake_dates
)
# set the annex backend in .gitattributes as a staged change
tbrepo.set_default_backend(
cfg.obtain('datalad.repo.backend'),
persistent=True, commit=False)
add_to_git[tbds.repo.pathobj / '.gitattributes'] = {
'type': 'file',
'state': 'added'}
# make sure that v6 annex repos never commit content under .datalad
attrs_cfg = (
('config', 'annex.largefiles', 'nothing'),
('metadata/aggregate*', 'annex.largefiles', 'nothing'),
('metadata/objects/**', 'annex.largefiles',
'({})'.format(cfg.obtain(
'datalad.metadata.create-aggregate-annex-limit'))))
attrs = tbds.repo.get_gitattributes(
[op.join('.datalad', i[0]) for i in attrs_cfg])
set_attrs = []
for p, k, v in attrs_cfg:
if not attrs.get(
op.join('.datalad', p), {}).get(k, None) == v:
set_attrs.append((p, {k: v}))
if set_attrs:
tbds.repo.set_gitattributes(
set_attrs,
attrfile=op.join('.datalad', '.gitattributes'))
# prevent git annex from ever annexing .git* stuff (gh-1597)
attrs = tbds.repo.get_gitattributes('.git')
if not attrs.get('.git', {}).get(
tbrepo = AnnexRepo(
tbds.path,
url=None,
create=True,
create_sanity_checks=False,
# do not set backend here, to avoid a dedicated commit
backend=None,
# None causes version to be taken from config
version=None,
description=description,
git_opts=initopts,
fake_dates=fake_dates
)
# set the annex backend in .gitattributes as a staged change
tbrepo.set_default_backend(
cfg.obtain('datalad.repo.backend'),
persistent=True, commit=False)
add_to_git[tbds.repo.pathobj / '.gitattributes'] = {
'type': 'file',
'state': 'added'}
# make sure that v6 annex repos never commit content under .datalad
attrs_cfg = (
('config', 'annex.largefiles', 'nothing'),
('metadata/aggregate*', 'annex.largefiles', 'nothing'),
('metadata/objects/**', 'annex.largefiles',
'({})'.format(cfg.obtain(
'datalad.metadata.create-aggregate-annex-limit'))))
attrs = tbds.repo.get_gitattributes(
[op.join('.datalad', i[0]) for i in attrs_cfg])
set_attrs = []
for p, k, v in attrs_cfg:
if not attrs.get(