Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def machines(self):
if not self._machines:
if not self._really_deploy:
self._machines = ['[dummy-host1]', '[dummy-host2]', '[dummy-host3]']
else:
try:
machines = Server.match_group('role', value=self._cluster.scheduler_role,
datacenter=self._cluster.dc)
except Server.NotFound:
log.error("Failed to determine scheduler hosts in dc: %s under role: %s" %
(self._cluster.dc, self._cluster.scheduler_role))
sys.exit(1)
self._machines = []
for machine in machines:
if not machine.attribute('unmonitored'):
self._machines.append(machine.hostname)
else:
log.info("Ignoring unmonitored host: %s" % machine.hostname)
return self._machines
def _bind_processes(self):
for process_name, fp in self._processes.items():
if fp is None:
process_ckpt = self._pathspec.given(process=process_name).getpath('process_checkpoint')
log.debug('ProcessMuxer binding %s => %s', process_name, process_ckpt)
try:
self._processes[process_name] = open(process_ckpt, 'r') # noqa
except IOError as e:
if e.errno == errno.ENOENT:
log.debug(' => bind failed, checkpoint not available yet.')
continue
else:
log.error("Unexpected inability to open %s! %s", process_ckpt, e)
except Exception as e:
log.error("Unexpected inability to open %s! %s", process_ckpt, e)
self._fast_forward_stream(process_name)
def compute_status(self):
if self.is_alive:
return None
if self._popen_signal != 0:
return StatusResult('Task killed by signal %s.' % self._popen_signal, mesos_pb2.TASK_KILLED)
if self._popen_rc == 0 or self._popen_rc == TERMINAL_TASK:
exit_state = self.EXIT_STATE_MAP.get(self.task_state())
if exit_state is None:
log.error('Received unexpected exit state from TaskMonitor.')
return StatusResult('Task checkpoint could not be read.', mesos_pb2.TASK_LOST)
else:
return exit_state
elif self._popen_rc == UNKNOWN_USER:
return StatusResult('Task started with unknown user.', mesos_pb2.TASK_FAILED)
elif self._popen_rc == INTERNAL_ERROR:
return StatusResult('Thermos failed with internal error.', mesos_pb2.TASK_LOST)
elif self._popen_rc == INVALID_TASK:
return StatusResult('Thermos received an invalid task.', mesos_pb2.TASK_FAILED)
elif self._popen_rc == UNKNOWN_ERROR:
return StatusResult('Thermos failed with an unknown error.', mesos_pb2.TASK_LOST)
else:
return StatusResult('Thermos exited for unknown reason (exit status: %s)' % self._popen_rc,
mesos_pb2.TASK_LOST)
def _args_error(self, d, e):
log.error("Failed to parse arguments %s: %s" % (repr(d), e))
offset = offset or -1
length = length or -1
try:
length = long(length)
offset = long(offset)
except ValueError:
return {}
if not os.path.isfile(filename):
return {}
try:
fstat = os.stat(filename)
except Exception as e:
log.error('Could not read from %s: %s' % (filename, e))
return {}
if offset == -1:
offset = fstat.st_size
if length == -1:
length = fstat.st_size - offset
with open(filename, "r") as fp:
fp.seek(offset)
try:
data = fp.read(length)
except IOError as e:
log.error('Failed to read %s: %s' % (filename, e), exc_info=True)
return {}
RUNTASK_INSTANCE_LIMIT, options.instances))
if options.instances * options.cpus > RUNTASK_CPU_LIMIT:
errors.append('aggregate CPU is over %.1f cores (actual: %.1f)' % (
RUNTASK_CPU_LIMIT, options.instances * options.cpus))
if options.instances * options.ram > RUNTASK_RAM_LIMIT:
errors.append('aggregate RAM is over %s (actual: %s)' % (
RUNTASK_RAM_LIMIT, options.instances * options.ram))
if options.instances * options.disk > RUNTASK_DISK_LIMIT:
errors.append('aggregate disk is over %s (actual: %s)' % (
RUNTASK_DISK_LIMIT, options.instances * options.disk))
if errors:
log.error('You must specify --yes_i_really_want_to_run_an_expensive_job because:')
for op in errors:
log.error(' - %s' % op)
return True
def _initialize_ckpt_header(self):
"""
Initializes the RunnerHeader for this checkpoint stream if it has not already
been constructed.
"""
if self._state.header is None:
try:
uid = pwd.getpwnam(self._user).pw_uid
except KeyError:
# This will cause failures downstream, but they will at least be correctly
# reflected in the process state.
log.error('Unknown user %s.', self._user)
uid = None
header = RunnerHeader(
task_id=self._task_id,
launch_time_ms=int(self._launch_time * 1000),
sandbox=self._sandbox,
log_dir=self._log_dir,
hostname=self._hostname,
user=self._user,
uid=uid,
ports=self._portmap)
runner_ckpt = RunnerCkpt(runner_header=header)
self._dispatcher.dispatch(self._state, runner_ckpt)
def _shutdown(self, status_result):
runner_status = self._runner.status
try:
propagate_deadline(self._chained_checker.stop, timeout=self.STOP_TIMEOUT)
except Timeout:
log.error('Failed to stop all checkers within deadline.')
except Exception:
log.error('Failed to stop health checkers:')
log.error(traceback.format_exc())
try:
propagate_deadline(self._runner.stop, timeout=self.STOP_TIMEOUT)
except Timeout:
log.error('Failed to stop runner within deadline.')
except Exception:
log.error('Failed to stop runner:')
log.error(traceback.format_exc())
# If the runner was alive when _shutdown was called, defer to the status_result,
# otherwise the runner's terminal state is the preferred state.
exit_status = runner_status or status_result
def _signal_kill_manager(self, driver, task_id, reason):
if self._task_id is None:
log.error('Was asked to kill task but no task running!')
return
if task_id != self._task_id:
log.error('Asked to kill a task other than what we are running!')
return
if not self.sandbox_created.is_set():
log.error('Asked to kill task with incomplete sandbox - aborting runner start')
self.runner_aborted.set()
return
self.log('Activating kill manager.')
self._kill_manager.kill(reason)
def _run_task(self, task):
assert self._runner, "_runner should be created before this method is called"
try:
self._runner.start()
log.info("Task runner for task %s started" % task.task_id)
self._send_update(task.task_id.value, mesos_pb2.TASK_RUNNING)
except TaskError as e:
log.error("Task runner for task %s failed to start: %s" % (task.task_id, str(e)))
# Send TASK_FAILED if the task failed to start.
self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED)
except Exception as e:
log.error("Error occurred while executing the task: %s" % e)
log.error(traceback.format_exc())
# Send TASK_LOST for unknown errors.
self._send_update(task.task_id.value, mesos_pb2.TASK_LOST)
else:
# Wait for the task's return code (when it terminates).
try:
returncode = self._runner.join()
# If '_runner' terminates, it has either failed or been killed.
log.warn("Task process terminated with return code %s" % returncode)
except TaskError as e:
log.error("Task terminated: %s" % e)
finally:
if self._killed:
self._send_update(task.task_id.value, mesos_pb2.TASK_KILLED)
else:
self._send_update(task.task_id.value, mesos_pb2.TASK_FAILED)
self._terminated.set()