Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not skip_if_output_present:
raise FailsException("fails provider made submit_job fail")
for task_view in job_model.task_view_generator(job_descriptor):
job_params = task_view.job_params
task_params = task_view.task_descriptors[0].task_params
outputs = job_params["outputs"] | task_params["outputs"]
if dsub_util.outputs_are_present(outputs):
print("Skipping task because its outputs are present")
continue
# if any task is allowed to run, then we fail.
raise FailsException("fails provider made submit_job fail")
return {"job-id": dsub_util.NO_JOB}
user_project, unique_job_id):
"""Allow provider to extract job-specific metadata from command-line args.
Args:
provider: job service provider
user_id: user submitting the job
job_name: name for the job
script: the script to run
task_ids: a set of the task-ids for all tasks in the job
user_project: name of the project to be billed for the request
unique_job_id: generate a unique job id
Returns:
A dictionary of job-specific metadata (such as job id, name, etc.)
"""
create_time = dsub_util.replace_timezone(datetime.datetime.now(), tzlocal())
user_id = user_id or dsub_util.get_os_user()
job_metadata = provider.prepare_job_metadata(script.name, job_name, user_id,
create_time)
if unique_job_id:
job_metadata['job-id'] = uuid.uuid4().hex
job_metadata['create-time'] = create_time
job_metadata['script'] = script
job_metadata['user-project'] = user_project
if task_ids:
job_metadata['task-ids'] = dsub_util.compact_interval_string(list(task_ids))
return job_metadata
def _desc_date_sort_key(t):
return now - dsub_util.replace_timezone(t.get_field('create-time'), None)
print('Waiting for predecessor jobs to complete...')
error_messages = _wait_after(provider, after, poll_interval, True)
if error_messages:
for msg in error_messages:
print_error(msg)
raise dsub_errors.PredecessorJobFailureError(
'One or more predecessor jobs completed but did not succeed.',
error_messages, None)
# Launch all the job tasks!
job_descriptor = job_model.JobDescriptor(job_metadata, job_params,
job_resources, task_descriptors)
launched_job = provider.submit_job(job_descriptor, skip)
if not dry_run:
if launched_job['job-id'] == dsub_util.NO_JOB:
print('Job output already present, skipping new job submission.')
return {'job-id': dsub_util.NO_JOB}
print('Launched job-id: %s' % launched_job['job-id'])
if launched_job.get('task-id'):
print('%s task(s)' % len(launched_job['task-id']))
print('To check the status, run:')
print(" dstat %s --jobs '%s' --users '%s' --status '*'" %
(provider_base.get_dstat_provider_args(provider, project),
launched_job['job-id'], launched_job['user-id']))
print('To cancel the job, run:')
print(" ddel %s --jobs '%s' --users '%s'" %
(provider_base.get_ddel_provider_args(provider, project),
launched_job['job-id'], launched_job['user-id']))
# Poll for job completion
if wait:
# 'OR' filtering arguments.
statuses = None if statuses == {'*'} else statuses
user_ids = None if user_ids == {'*'} else user_ids
job_ids = None if job_ids == {'*'} else job_ids
job_names = None if job_names == {'*'} else job_names
task_ids = None if task_ids == {'*'} else task_ids
task_attempts = None if task_attempts == {'*'} else task_attempts
# 'AND' filtering arguments.
labels = labels if labels else {}
# The local provider is intended for local, single-user development. There
# is no shared queue (jobs run immediately) and hence it makes no sense
# to look up a job run by someone else (whether for dstat or for ddel).
# If a user is passed in, we will allow it, so long as it is the current
# user. Otherwise we explicitly error out.
approved_users = {dsub_util.get_os_user()}
if user_ids:
if user_ids != approved_users:
raise NotImplementedError(
'Filtering by user is not implemented for the local provider'
' (%s)' % str(user_ids))
else:
user_ids = approved_users
ret = []
if not job_ids:
# Default to every job we know about.
job_ids = os.listdir(self._provider_root())
for j in job_ids:
for u in user_ids:
path = self._provider_root() + '/' + j
if not os.path.isdir(path):
Returns:
A JobDescriptor populated as best we can from the old meta.yaml.
"""
# The v0 meta.yaml only contained:
# create-time, job-id, job-name, logging, task-id
# labels, envs, inputs, outputs
# It did NOT contain user-id.
# dsub-version might be there as a label.
job_metadata = {}
for key in ['job-id', 'job-name', 'create-time']:
job_metadata[key] = job.get(key)
# Make sure that create-time string is turned into a datetime
job_metadata['create-time'] = dsub_util.replace_timezone(
datetime.datetime.strptime(job['create-time'], '%Y-%m-%d %H:%M:%S.%f'),
tzlocal())
# The v0 meta.yaml contained a "logging" field which was the task-specific
# logging path. It did not include the actual "--logging" value the user
# specified.
job_resources = Resources()
# The v0 meta.yaml represented a single task.
# It did not distinguish whether params were job params or task params.
# We will treat them as either all job params or all task params, based on
# whether the task-id is empty or an integer value.
#
# We also cannot distinguish whether inputs/outputs were recursive or not.
# Just treat them all as non-recursive.
params = {}
def dsub_main(prog, argv):
# Parse args and validate
args = _parse_arguments(prog, argv)
# intent:
# * dsub tightly controls the output to stdout.
# * wrap the main body such that output goes to stderr.
# * only emit the job-id to stdout (which can then be used programmatically).
with dsub_util.replace_print():
launched_job = run_main(args)
print(launched_job.get('job-id', ''))
return launched_job
Otherwise the value must be of the form "" where supported
units are s, m, h, d, w (seconds, minutes, hours, days, weeks).
Args:
age: A "" string or integer value.
from_time:
Returns:
A timezone-aware datetime or None if age parameter is empty.
"""
if not age:
return None
if not from_time:
from_time = dsub_util.replace_timezone(datetime.datetime.now(), tzlocal())
try:
last_char = age[-1]
if last_char == 's':
return from_time - datetime.timedelta(seconds=int(age[:-1]))
elif last_char == 'm':
return from_time - datetime.timedelta(minutes=int(age[:-1]))
elif last_char == 'h':
return from_time - datetime.timedelta(hours=int(age[:-1]))
elif last_char == 'd':
return from_time - datetime.timedelta(days=int(age[:-1]))
elif last_char == 'w':
return from_time - datetime.timedelta(weeks=int(age[:-1]))
else:
# If no unit is given treat the age as seconds from epoch, otherwise apply
Returns:
task_descriptors: an array of records, each containing the task-id,
task-attempt, 'envs', 'inputs', 'outputs', 'labels' that defines the set of
parameters for each task of the job.
Raises:
ValueError: If no job records were provided
"""
task_descriptors = []
path = tasks['path']
task_min = tasks.get('min')
task_max = tasks.get('max')
# First check for any empty lines
param_file = dsub_util.load_file(path)
if any([not line for line in param_file]):
raise ValueError('Blank line(s) found in {}'.format(path))
param_file.close()
# Load the file and set up a Reader that tokenizes the fields
param_file = dsub_util.load_file(path)
reader = csv.reader(param_file, delimiter='\t')
# Read the first line and extract the parameters
header = six.advance_iterator(reader)
job_params = parse_tasks_file_header(header, input_file_param_util,
output_file_param_util)
# Build a list of records from the parsed input file
for row in reader:
# Tasks are numbered starting at 1 and since the first line of the TSV