Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
result.append(batch)
return result
resp = api.check_status(job_key)
if resp.responseCode is not ResponseCode.OK:
log.error("Job %s could not be found" % job_key)
exit(1)
tasks = resp.result.scheduleStatusResult.tasks or None
if batch_size is not None and batch_size > 0 and tasks is not None:
instance_ids = set(instance.assignedTask.instanceId for instance in tasks)
instances_to_kill = instance_ids & set(instances_arg or instance_ids)
errors = 0
for batch in make_batches(instances_to_kill, batch_size):
resp = api.kill_job(job_key, batch)
if resp.responseCode is not ResponseCode.OK:
log.info("Kill of shards %s failed with error %s" % (batch, combine_messages(resp)))
print('ERROR IN KILL_JOB')
errors += 1
if errors > max_failures:
log.error("Exceeded maximum number of errors while killing instances")
exit(1)
if errors > 0:
print("Warning: errors occurred during batch kill")
exit(1)
else:
if tasks is None or len(tasks) == 0:
log.error('No tasks to kill found for job %s' % job_key)
return 1
def _get_tasks(self, task_query):
resp = self._scheduler.getTasksWithoutConfigs(task_query)
log.info(format_response(resp))
if resp.responseCode != ResponseCode.OK:
return []
return resp.result.scheduleStatusResult.tasks
def set_instance_healthy(instance_id, now):
if instance_id not in instance_states:
instance_states[instance_id] = Instance(now)
instance = instance_states.get(instance_id)
if now > (instance.birthday + self._watch_secs):
log.info('Instance %s has been up and healthy for at least %d seconds' % (
instance_id, self._watch_secs))
instance.set_healthy(True)
def remove_cluster_state(self, cluster_name):
path = self._get_cluster_state_path(cluster_name)
if not os.path.isfile(path):
log.info("No cluster state found on path %s" % path)
return
os.remove(path)
def _detect_assets(self):
log.info('detecting assets...')
assets = pkg_resources.resource_listdir(__name__, 'assets')
cached_assets = {}
for asset in assets:
log.info(' detected asset: %s', asset)
cached_assets[asset] = pkg_resources.resource_string(
__name__, os.path.join('assets', asset))
self._assets = cached_assets
def ping(self, message, ttl=60):
self._pings.increment()
log.info('Got ping (ttl=%s): %s' % (message, ttl))
ttl = int(ttl) - 1
if ttl > 0:
defer(partial(self.send_request, 'ping', message, ttl), delay=self.PING_DELAY,
clock=self._clock)
def stop(self, timeout=None):
"""Stop the runner. If it's already completed, no-op. If it's still running, issue a kill."""
if not self.__started:
raise TaskError('Failed to call TaskRunner.start.')
log.info('Invoking runner HTTP teardown.')
self._terminate_http()
return self._runner.stop(timeout=timeout if timeout is not None else self._runner.MAX_WAIT)
def stop_scheduler(self, host):
log.info('Stopping the scheduler on %s at %s' % (host, strftime('%Y-%m-%d %H:%M:%S', gmtime())))
log.info('Temporarily disabling monit for the scheduler on %s' % host)
self._deployer.remote_check_call(host, ['sudo', 'monit', 'unmonitor', 'mesos-scheduler'])
self.fetch_scheduler_http(host, 'quitquitquit')
if self._really_deploy:
log.info('Waiting for scheduler to stop cleanly')
time.sleep(5)
log.info('Stopping scheduler via monit')
self._deployer.remote_check_call(host, ['sudo', 'monit', 'stop', 'mesos-scheduler'])