Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
In the standard case, the information in these results will be used to
actually re-execute the commands.
"""
revs = dset.repo.repo.git.rev_list("--reverse", revrange, "--").split()
try:
results = _revs_as_results(dset, revs)
except ValueError as exc:
yield get_status_dict("run", status="error", message=exc_str(exc))
return
if since is not None and since.strip() == "":
# For --since='', drop any leading commits that don't have
# a run command.
results = list(dropwhile(lambda r: "run_info" not in r, results))
if not results:
yield get_status_dict(
"run", status="impossible", ds=dset,
message=("No run commits found in history of %s", revrange))
return
else:
results = list(results)
if not results:
yield get_status_dict(
"run", status="impossible", ds=dset,
message=("No commits found in %s", revrange))
return
if onto is not None and onto.strip() == "":
# Special case: --onto='' is the value of --since. Because we're
# currently aborting if the revision list contains merges, we know
# that, regardless of if and how --since is specified, the effective
# value for --since is the parent of the first revision.
yield res
return
if not isinstance(spec, (tuple, list)):
# maybe coming from config
import shlex
spec = shlex.split(spec)
name = spec[0]
args = spec[1:]
try:
# get the first match an run with it
procedure_file, cmd_name, cmd_tmpl, cmd_help = \
next(_get_procedure_implementation(name, ds=ds))
except StopIteration:
res = get_status_dict(
action='run_procedure',
# TODO: Default renderer requires a key "path" to exist.
# Doesn't make a lot of sense in this case
path=name,
logger=lgr,
refds=ds.path if ds else None,
status='impossible',
message="Cannot find procedure with name '%s'" % name)
yield res
return
ex = _guess_exec(procedure_file)
# configured template (call-format string) takes precedence:
if cmd_tmpl:
ex['template'] = cmd_tmpl
def _revs_as_results(dset, revs):
for rev in revs:
res = get_status_dict("run", ds=dset, commit=rev)
full_msg = dset.repo.format_commit("%B", rev)
try:
msg, info = get_run_info(dset, full_msg)
except ValueError as exc:
# Recast the error so the message includes the revision.
raise ValueError(
"Error on {}'s message: {}".format(rev, exc_str(exc)))
if info is not None:
res["run_info"] = info
res["run_message"] = msg
yield dict(res, status="ok")
ds = require_dataset(
dataset, check_installed=True,
purpose='tracking outcomes of a command')
# not needed ATM
#refds_path = ds.path
lgr.debug('tracking command output underneath %s', ds)
if not (rerun_info or inject): # Rerun already takes care of this.
# For explicit=True, we probably want to check whether any inputs have
# modifications. However, we can't just do is_dirty(..., path=inputs)
# because we need to consider subdatasets and untracked files.
if not explicit and ds.repo.dirty:
yield get_status_dict(
'run',
ds=ds,
status='impossible',
message=('unsaved modifications present, '
'cannot detect changes by command'))
return
cmd = normalize_command(cmd)
inputs = GlobbedPaths(inputs, pwd=pwd,
expand=expand in ["inputs", "both"])
extra_inputs = GlobbedPaths(extra_inputs, pwd=pwd,
# Follow same expansion rules as `inputs`.
expand=expand in ["inputs", "both"])
outputs = GlobbedPaths(outputs, pwd=pwd,
expand=expand in ["outputs", "both"])
except CommandError:
raw_info = {}
available_space = raw_info.get('available local disk space', None)
for trust in ('trusted', 'semitrusted', 'untrusted'):
ri = raw_info.get('{} repositories'.format(trust), [])
for r in ri:
uuid = r.get('uuid', '00000000-0000-0000-0000-00000000000')
if uuid.startswith('00000000-0000-0000-0000-00000000000'):
continue
ainfo = annex_info.get(uuid, {})
ainfo['description'] = r.get('description', None)
annex_info[uuid] = ainfo
# treat the local repo as any other remote using 'here' as a label
remotes = [name] if name else ['here'] + known_remotes
for remote in remotes:
info = get_status_dict(
action='query-sibling',
path=ds.path,
type='sibling',
name=remote,
**res_kwargs)
if remote != 'here' and remote not in known_remotes:
info['status'] = 'error'
info['message'] = 'unknown sibling name'
yield info
continue
# now pull everything we know out of the config
# simply because it is cheap and we don't have to go through
# tons of API layers to be able to work with it
if remote == 'here':
# special case: this repo
# aim to provide info using the same keys as for remotes
# somewhat "ugly"
# providers.get_provider(url).get_downloader(url).download(url, path=path)
# for now -- via sugaring
try:
downloaded_path = providers.download(url, path=path, overwrite=overwrite)
except Exception as e:
yield get_status_dict(
status="error",
message=exc_str(e),
type="file",
path=path,
**common_report)
else:
downloaded_paths.append(downloaded_path)
path_urls[downloaded_path] = url
yield get_status_dict(
status="ok",
type="file",
path=downloaded_path,
**common_report)
if downloaded_paths and save and ds is not None:
msg = message or """\
[DATALAD] Download URLs
URLs:
{}""".format("\n ".join(urls))
for r in ds.add(downloaded_paths, message=msg):
yield r
if isinstance(ds.repo, AnnexRepo):
status_kwargs = dict(
action='install',
logger=lgr,
refds=refds_path,
source_url=source_url)
try:
# this will implicitly cause pathlib to run a bunch of checks
# whether the present path makes any sense on the platform
# we are running on -- we don't care if the path actually
# exists at this point, but we want to abort early if the path
# spec is determined to be useless
path.exists()
except OSError as e:
yield get_status_dict(
status='error',
path=path,
message=('cannot handle target path: %s', exc_str(e)),
**status_kwargs)
return
destination_dataset = Dataset(path)
status_kwargs['ds'] = destination_dataset
dest_path = path
# important test! based on this `rmtree` will happen below after failed clone
if dest_path.exists() and any(dest_path.iterdir()):
if destination_dataset.is_installed():
# check if dest was cloned from the given source before
# this is where we would have installed this from
guessed_sources = _get_flexible_source_candidates(
# in case there was no metadata provider, we do not want to start
# downloading everything: see https://github.com/datalad/datalad/issues/2458
objfiles.difference_update([None])
lgr.debug(
'Verifying/achieving local availability of %i metadata objects',
len(objfiles))
if objfiles:
get(path=[dict(path=op.join(agg_base_path, of),
parentds=ds.path, type='file')
for of in objfiles if of],
dataset=ds,
result_renderer='disabled')
for qap in to_query_available:
# info about the dataset that contains the query path
dsinfo = agginfos.get(qap['metaprovider'], dict(id=ds.id))
res_tmpl = get_status_dict()
for s, d in (('id', 'dsid'), ('refcommit', 'refcommit')):
if s in dsinfo:
res_tmpl[d] = dsinfo[s]
# pull up dataset metadata, always needed if only for the context
dsmeta = {}
dsobjloc = dsinfo.get('dataset_info', None)
if dsobjloc is not None:
dsmeta = _load_json_object(
op.join(agg_base_path, dsobjloc),
cache=cache['objcache'])
for r in _query_aggregated_metadata_singlepath(
ds, agginfos, agg_base_path, qap, reporton,
cache, dsmeta,
dsinfo.get('content_info', None)):
nondataset_path_status='impossible',
return_type='generator',
# if there is an error now, we made this mistake in here
on_failure='stop')
# now sort into datasets so we can process them one by one
content_by_ds, ds_props, completed, nondataset_paths = \
annotated2content_by_ds(
annotated_paths,
refds_path=refds_path)
assert(not completed)
# iterate over all datasets, starting at the bottom
for dspath in sorted(content_by_ds.keys(), reverse=True):
ds = Dataset(dspath)
res = get_status_dict('save', ds=ds, logger=lgr)
if not ds.is_installed():
# TODO This is likely impossible now
res['status'] = 'impossible'
res['message'] = ('dataset %s is not installed', ds)
yield res
continue
saved_state = save_dataset(
ds,
content_by_ds[dspath],
message=message)
res['status'] = 'ok' if saved_state else 'notneeded'
# MIH: let's tag even if there was nothing commit. I'd forget this
# option too often...
if version_tag:
try:
# TODO: check whether comment below is still true after
continue
if not ap.get('type', None) == 'dataset':
ap.update(
status='impossible',
message="can only update datasets")
yield ap
continue
# this is definitely as dataset from here on
ds = Dataset(ap['path'])
if not ds.is_installed():
lgr.debug("Skipping update since not installed %s", ds)
continue
repo = ds.repo
# prepare return value
# TODO reuse AP for return props
res = get_status_dict('update', ds=ds, logger=lgr, refds=refds_path)
# get all remotes which have references (would exclude
# special remotes)
remotes = repo.get_remotes(
**({'exclude_special_remotes': True} if isinstance(repo, AnnexRepo) else {}))
if not remotes and not sibling:
res['message'] = ("No siblings known to dataset at %s\nSkipping",
repo.path)
res['status'] = 'notneeded'
yield res
continue
if not sibling and len(remotes) == 1:
# there is only one remote, must be this one
sibling_ = remotes[0]
elif not sibling:
# nothing given, look for tracking branch
sibling_ = repo.get_tracking_branch()[0]