Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
name=None,
existing='error',
access=None,
publish_depends=None,
description=None,
dryrun=False):
path = resolve_path(assure_list(path), ds=dataset) \
if path else None
if project and (recursive or (path and len(path) > 1)):
raise ValueError(
'Providing a GitLab project name/location cannot be combined '
'with recursive operation or multiple paths, as each dataset '
'needs to be mapped onto its own individual project.')
# what to operate on
ds = require_dataset(
dataset, check_installed=True, purpose='create GitLab sibling(s)')
# cache for objects of gitlab sites (we could face different ones
# in a single hierarchy, cache them to avoid duplicate initialization
# while still being able to process each dataset individually
siteobjs = dict()
# which datasets to process?
if path is None:
for r in _proc_dataset(
ds, ds,
site, project, name, layout, existing, access,
dryrun, siteobjs, publish_depends, description):
yield r
if path or recursive:
# also include any matching subdatasets
lgr.warning("No command given")
return
if saver:
warnings.warn("`saver` argument is ignored "
"and will be removed in a future release",
DeprecationWarning)
rel_pwd = rerun_info.get('pwd') if rerun_info else None
if rel_pwd and dataset:
# recording is relative to the dataset
pwd = normpath(opj(dataset.path, rel_pwd))
rel_pwd = relpath(pwd, dataset.path)
else:
pwd, rel_pwd = get_command_pwds(dataset)
ds = require_dataset(
dataset, check_installed=True,
purpose='tracking outcomes of a command')
ds_path = ds.path
lgr.debug('tracking command output underneath %s', ds)
if not (rerun_info or inject): # Rerun already takes care of this.
# For explicit=True, we probably want to check whether any inputs have
# modifications. However, we can't just do is_dirty(..., path=inputs)
# because we need to consider subdatasets and untracked files.
# MIH: is_dirty() is gone, but status() can do all of the above!
if not explicit and ds.repo.dirty:
yield get_status_dict(
'run',
ds=ds,
status='impossible',
def __call__(
path=None,
dataset=None,
recursive=False,
recursion_limit=None,
update_mode='target',
incremental=False,
force_extraction=False,
save=True):
refds_path = Interface.get_refds_path(dataset)
# it really doesn't work without a dataset
ds = require_dataset(
dataset, check_installed=True, purpose='metadata aggregation')
path = assure_list(path)
if not path:
# then current/reference dataset is "aggregated"
# We should not add ds.path always since then --recursive would
# also recurse current even if paths are given
path.append(ds.path)
agginfo_db_location, agg_base_path = get_ds_aggregate_db_locations(ds)
agginfo_db = load_ds_aggregate_db(ds, abspath=True)
to_save = []
to_aggregate = set()
for ap in AnnotatePaths.__call__(
dataset=refds_path,
path=path,
def __call__(dataset, pattern, ref_dir='.', makedirs=False):
# could be extended to accept actual largefile expressions
from os.path import join as opj
from os.path import isabs
from os.path import exists
from os import makedirs as makedirsfx
from datalad.distribution.dataset import require_dataset
from datalad.support.annexrepo import AnnexRepo
from datalad.utils import assure_list
pattern = assure_list(pattern)
ds = require_dataset(dataset, check_installed=True,
purpose='no_annex configuration')
res_kwargs = dict(
path=ds.path,
type='dataset',
action='no_annex',
)
# all the ways we refused to cooperate
if not isinstance(ds.repo, AnnexRepo):
yield dict(
res_kwargs,
status='notneeded',
message='dataset has no annex')
return
if any(isabs(p) for p in pattern):
def require_rev_dataset(dataset, check_installed=True, purpose=None):
return RevolutionDataset(_require_dataset(
dataset,
check_installed,
purpose).path)
recursion_limit=None,
get_data=True,
description=None,
reckless=False,
jobs='auto',
):
refds_path = Interface.get_refds_path(dataset)
if not (dataset or path):
raise InsufficientArgumentsError(
"Neither dataset nor target path(s) provided")
if dataset and not path:
# act on the whole dataset if nothing else was specified
path = refds_path
# we have to have a single dataset to operate on
refds = require_dataset(
dataset, check_installed=True, purpose='get content')
content_by_ds = {}
# use subdatasets() to discover any relevant content that is not
# already present in the root dataset (refds)
for sdsres in Subdatasets.__call__(
contains=path,
# maintain path argument semantics and pass in dataset arg
# as is
dataset=dataset,
# always come from the top to get sensible generator behavior
bottomup=False,
# when paths are given, they will constrain the recursion
# automatically, and we need to enable recursion so we can
# location path in subdatasets several levels down
recursive=True if path else recursive,
import os
import tarfile
import zipfile
from mock import patch
from os.path import join as opj, dirname, normpath, isabs
import os.path as op
from datalad.distribution.dataset import require_dataset
from datalad.utils import file_basename
from datalad.support.annexrepo import AnnexRepo
from datalad.dochelpers import exc_str
import logging
lgr = logging.getLogger('datalad.plugin.export_archive')
dataset = require_dataset(dataset, check_installed=True,
purpose='export archive')
repo = dataset.repo
committed_date = repo.get_commit_date()
# could be used later on to filter files by some criterion
def _filter_tarinfo(ti):
# Reset the date to match the one of the last commit, not from the
# filesystem since git doesn't track those at all
# TODO: use the date of the last commit when any particular
# file was changed -- would be the most kosher yoh thinks to the
# degree of our abilities
ti.mtime = committed_date
return ti
tar_args = dict(recursive=False, filter=_filter_tarinfo)
else refds_path
# we know that we need to create a dataset at `path`
assert(path is not None)
# assure cfg_proc is a list (relevant if used via Python API)
cfg_proc = assure_list(cfg_proc)
# prep for yield
res = dict(action='create', path=str(path),
logger=lgr, type='dataset',
refds=refds_path)
refds = None
if refds_path and refds_path != str(path):
refds = require_dataset(
refds_path, check_installed=True,
purpose='creating a subdataset')
path_inrefds = path_under_rev_dataset(refds, path)
if path_inrefds is None:
yield dict(
res,
status='error',
message=(
"dataset containing given paths is not underneath "
"the reference dataset %s: %s",
ds, str(path)),
)
return
# try to locate an immediate parent dataset
path=None,
dataset=None,
get_aggregates=False,
reporton='all',
recursive=False):
# prep results
refds_path = Interface.get_refds_path(dataset)
res_kwargs = dict(action='metadata', logger=lgr)
if refds_path:
res_kwargs['refds'] = refds_path
if get_aggregates:
# yield all datasets for which we have aggregated metadata as results
# the get actual dataset results, so we can turn them into dataset
# instances using generic top-level code if desired
ds = require_dataset(
refds_path,
check_installed=True,
purpose='aggregate metadata query')
agginfos = load_ds_aggregate_db(
ds,
version=str(aggregate_layout_version),
abspath=True
)
if not agginfos:
# if there has ever been an aggregation run, this file would
# exist, hence there has not been and we need to tell this
# to people
yield get_status_dict(
ds=ds,
status='impossible',
action='metadata',
common_kwargs = dict(
get_data=get_data,
recursive=recursive,
recursion_limit=recursion_limit,
# git_opts=git_opts,
# annex_opts=annex_opts,
reckless=reckless,
jobs=jobs,
)
# did we explicitly get a dataset to install into?
# if we got a dataset, path will be resolved against it.
# Otherwise path will be resolved first.
ds = None
if dataset is not None:
ds = require_dataset(dataset, check_installed=True,
purpose='installation')
common_kwargs['dataset'] = dataset
# pre-compute for results below
refds_path = Interface.get_refds_path(ds)
# switch into the two scenarios without --source:
# 1. list of URLs
# 2. list of (sub)dataset content
if source is None:
# we need to collect URLs and paths
to_install = []
to_get = []
# TODO: this approach is problematic, it disrupts the order of input args.
# consequently results will be returned in an unexpected order when a
# mixture of source URL and paths is given. Reordering is only possible when
# everything in here is fully processed before any results can be yielded.