Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def initiate(self):
if self._initiated:
return
self._initiated = True
d = opj(self.repopath, '.git', 'bin')
if not exists(d):
os.makedirs(d)
suf = '-' + self.custom_remote_name.rstrip(':') if self.custom_remote_name else ''
self._file = _file = opj(d, 'git-annex-remote-datalad' + suf)
if exists(_file):
lgr.debug("Commenting out previous entries")
# comment out all the past entries
with open(_file, 'rb') as f:
entries = list(map(assure_unicode, f.readlines()))
for i in range(len(self.HEADER.split(os.linesep)), len(entries)):
e = entries[i]
if e.startswith('recv ') or e.startswith('send '):
entries[i] = '#' + e
with open(_file, 'wb') as f:
f.write(u''.join(entries).encode('utf-8'))
return # nothing else to be done
lgr.debug("Initiating protocoling."
"cd %s; vim %s"
% (realpath(self.repopath),
_file[len(self.repopath) + 1:]))
with open(_file, 'a') as f:
f.write(self.HEADER)
os.chmod(_file, 0o755)
annex_opts=None,
annex_add_opts=None,
jobs=None):
# parameter constraints:
if not path:
raise InsufficientArgumentsError(
"insufficient information for adding: requires at least a path")
refds_path = Interface.get_refds_path(dataset)
common_report = dict(action='add', logger=lgr, refds=refds_path)
if message and message_file:
raise ValueError("Both a message and message file were specified")
if message_file:
with open(message_file, "rb") as mfh:
message = assure_unicode(mfh.read())
to_add = []
subds_to_add = {}
ds_to_annotate_from_recursion = {}
got_nothing = True
for ap in AnnotatePaths.__call__(
path=path,
dataset=dataset,
# never recursion, need to handle manually below to be able to
# discover untracked content
recursive=False,
action='add',
# speed things up by using Git's modification detection, if there
# is a repo with at least one commit
modified='HEAD' \
if dataset and \
def _process_one_line(self, out_type, proc, log_, log_is_callable,
expected=False, line=None, suf=None):
if line is None:
lgr.log(3, "Reading line from %s", out_type)
line = {'stdout': proc.stdout, 'stderr': proc.stderr}[out_type].readline()
else:
lgr.log(3, "Processing provided line")
if line and log_is_callable:
# Let it be processed
line = log_(assure_unicode(line))
if line is not None:
# we are working with binary type here
line = assure_bytes(line)
if line:
if out_type == 'stdout':
self._log_out(assure_unicode(line))
elif out_type == 'stderr':
self._log_err(line.decode('utf-8') if PY3 else line,
expected)
else: # pragma: no cover
raise RuntimeError("must not get here")
return (line + suf) if suf else line
# it was output already directly but for code to work, return ""
return binary_type()
def yield_participant_info(bids):
for bidsvars in bids.get_collections(
level='dataset')[0].to_df().to_dict(orient='records'):
props = dict(id=assure_unicode(bidsvars.pop('subject')))
for p in bidsvars:
# take away some ambiguity
normk = assure_unicode(p).lower()
hk = content_metakey_map.get(normk, normk)
val = assure_unicode(bidsvars[p])
if hk in ('sex', 'gender'):
if hasattr(val, 'lower'):
val = val.lower()
elif isinstance(val, float) and isnan(val):
# pybids reports 'n/a' is NaN
val = 'n/a'
val = sex_label_map.get(val, val)
if hk == 'suffix' and val == 'participants':
# regression in PyBIDS 0.7.1, should be fixed in 0.8
# https://github.com/bids-standard/pybids/issues/380
# TODO: remove workaround whenever we depend on pybids >= 0.8
#
# FIXME: This covers the predominant command-line case, but, for
# Python API callers, it means values like ["./script with spaces"]
# requires additional string-like escaping, which is inconsistent
# with the handling of multi-item lists (and subprocess's
# handling). Once we have a way to detect "running from Python API"
# (discussed in gh-2986), update this.
command = command[0]
else:
if command and command[0] == "--":
# Strip disambiguation marker. Note: "running from Python API"
# FIXME from below applies to this too.
command = command[1:]
command = " ".join(shlex_quote(c) for c in command)
else:
command = assure_unicode(command)
return command
lgr.debug(
'Added %s on dataset %s',
single_or_plural(
'document',
'documents',
idx_size - old_idx_size,
include_count=True),
old_ds_rpath)
log_progress(lgr.info, 'autofieldidxbuild',
'Indexed dataset at %s', old_ds_rpath,
update=1, increment=True)
old_idx_size = idx_size
old_ds_rpath = admin['path']
admin['id'] = res.get('dsid', None)
doc.update({k: assure_unicode(v) for k, v in admin.items()})
lgr.debug("Adding document to search index: {}".format(doc))
# inject into index
idx.add_document(**doc)
idx_size += 1
if old_ds_rpath:
lgr.debug(
'Added %s on dataset %s',
single_or_plural(
'document',
'documents',
idx_size - old_idx_size,
include_count=True),
old_ds_rpath)
lgr.debug("Committing index")
def run(self, cmd, env=None, *args, **kwargs):
out, err = super(GitRunner, self).run(
cmd, env=self.get_git_environ_adjusted(env), *args, **kwargs)
# All communication here will be returned as unicode
# TODO: do that instead within the super's run!
return assure_unicode(out), assure_unicode(err)
)
meta = {
self._key2stdkey.get(k, k): v
for k, v in dsdesc_dict.items()
}
# TODO maybe normalize labels of standard licenses to definition URIs
# perform mapping
README_fname = opj(self.ds.path, 'README')
if not meta.get('description') and exists(README_fname):
# BIDS uses README to provide description, so if was not
# explicitly provided to possibly override longer README, let's just
# load README
with open(README_fname, 'rb') as f:
desc = assure_unicode(f.read())
meta['description'] = desc.strip()
# special case
# Could be None which we can't strip so or ''
bids_version = (meta.get('BIDSVersion', '') or '').strip()
bids_defurl = 'http://bids.neuroimaging.io'
if bids_version:
bids_defurl += '/bids_spec{}.pdf'.format(bids_version)
meta['conformsto'] = bids_defurl
context['bids'] = {
# not really a working URL, but BIDS doesn't provide term defs in
# any accessible way
'@id': '{}#'.format(bids_defurl),
'description': 'ad-hoc vocabulary for the Brain Imaging Data Structure (BIDS) standard',
'type': vocabulary_id,
}
and that leading directory will be removed.
"""
if not exists(dir_):
lgr.debug("Creating directory %s to extract archive into" % dir_)
os.makedirs(dir_)
with swallow_outputs() as cmo:
archive = assure_bytes(archive)
dir_ = assure_bytes(dir_)
patoolib.util.check_existing_filename(archive)
patoolib.util.check_existing_filename(dir_, onlyfiles=False)
# Call protected one to avoid the checks on existence on unixified path
outdir = unixify_path(dir_)
if not PY2:
# should be supplied in PY3 to avoid b''
outdir = assure_unicode(outdir)
archive = assure_unicode(archive)
format_compression = patoolib.get_archive_format(archive)
if format_compression == ('gzip', None):
# Yarik fell into the trap of being lazy and not providing proper
# support for .gz .xz etc "stream archivers" formats in handling
# of archives. ATM out support for .gz relies on behavior of 7z while
# extracting them and respecting possibly present .gz filename
# header field.
# See more https://github.com/datalad/datalad/pull/3176#issuecomment-466819861
# TODO: provide proper handling of all those archives without
# relying on any filename been stored in the header
program = patoolib.find_archive_program(
format_compression[0], 'extract')
if basename(program) != '7z':
raise MissingExternalDependency(