Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_region_nobucket_nomatchexists(self):
# aws_region specified, no bucket specified, no buckets have matching
# region
self.bucket1.set_location('PUPPYLAND')
j = EMRJobRunner(aws_region='KITTYLAND',
s3_endpoint='KITTYLAND',
conf_path=False)
self.assertNotEqual(j._opts['s3_scratch_uri'], self.bucket1_uri)
def make_runner(self):
self.runner = EMRJobRunner(conf_paths=[])
self.add_mock_s3_data({'walrus': {}})
self.runner = EMRJobRunner(cloud_fs_sync_secs=0,
cloud_tmp_dir='s3://walrus/tmp',
conf_paths=[])
self.runner._s3_log_dir_uri = BUCKET_URI + LOG_DIR
self.prepare_runner_for_ssh(self.runner)
self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd')
def test_explicit_endpoints(self):
runner = EMRJobRunner(conf_paths=[], aws_region='EU',
s3_endpoint='s3-proxy', emr_endpoint='emr-proxy')
self.assertEqual(runner.make_emr_conn().host, 'emr-proxy')
def make_pooled_cluster(self, **kwargs):
cluster_id = EMRJobRunner(**kwargs).make_persistent_cluster()
# simulate that instances are provisioned
mock_cluster = self.mock_emr_clusters[cluster_id]
mock_cluster['Status']['State'] = 'WAITING'
mock_cluster['MasterPublicDnsName'] = 'mockmaster'
for ig in mock_cluster['_InstanceGroups']:
ig['RunningInstanceCount'] = ig['RequestedInstanceCount']
return cluster_id
def test_no_waiting_for_job_pool_success(self):
self.mock_cluster_ids.append('j-fail-lock')
runner = EMRJobRunner(conf_paths=[], pool_wait_minutes=0)
cluster_id = runner._find_cluster()
self.assertEqual(cluster_id, None)
def test_dont_destroy_own_pooled_cluster_on_failure(self):
# Issue 242: job failure shouldn't kill the pooled clusters
mr_job = MRTwoStepJob(['-r', 'emr', '-v',
'--pool-clusters'])
mr_job.sandbox()
self.mock_emr_failures = set([('j-MOCKCLUSTER0', 0)])
with mr_job.make_runner() as runner:
self.assertIsInstance(runner, EMRJobRunner)
self.prepare_runner_for_ssh(runner)
self.assertRaises(StepFailedException, runner.run)
for _ in range(10):
self.simulate_emr_progress(runner.get_cluster_id())
cluster = runner._describe_cluster()
self.assertEqual(cluster['Status']['State'], 'WAITING')
# job shouldn't get terminated by cleanup
for _ in range(10):
self.simulate_emr_progress(runner.get_cluster_id())
cluster = runner._describe_cluster()
self.assertEqual(cluster['Status']['State'], 'WAITING')
def make_runner(self):
self.runner = EMRJobRunner(conf_paths=[])
self.add_mock_s3_data({'walrus': {}})
self.runner = EMRJobRunner(s3_sync_wait_time=0,
s3_tmp_dir='s3://walrus/tmp',
conf_paths=[])
self.runner._s3_log_dir_uri = BUCKET_URI + LOG_DIR
self.prepare_runner_for_ssh(self.runner)
self.output_dir = tempfile.mkdtemp(prefix='mrboss_wd')
self.runner._address_of_master = MagicMock(return_value='testmaster')
def _s3_cleanup(glob_path, time_old, dry_run=False, **runner_kwargs):
"""Delete all files older than *time_old* in *path*.
If *dry_run* is true, then just log the files that need to be
deleted without actually deleting them
"""
runner = EMRJobRunner(**runner_kwargs)
log.info('Deleting all files in %s that are older than %s' %
(glob_path, time_old))
for path, key in runner.fs.s3._ls(glob_path):
age = _boto3_now() - key.last_modified
if age > time_old:
# Delete it
log.info('Deleting %s; is %s old' % (path, age))
if not dry_run:
key.delete()
def main(args=None):
now = _boto3_now()
arg_parser = _make_arg_parser()
options = arg_parser.parse_args(args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
log.info('getting information about running jobs')
min_time = timedelta(hours=options.min_hours)
emr_client = EMRJobRunner(**_runner_kwargs(options)).make_emr_client()
cluster_summaries = _boto3_paginate(
'Clusters', emr_client, 'list_clusters',
ClusterStates=['STARTING', 'BOOTSTRAPPING', 'RUNNING'])
if not options.exclude:
filtered_cluster_summaries = cluster_summaries
else:
filtered_cluster_summaries = _filter_clusters(
cluster_summaries, emr_client, options.exclude)
job_info = _find_long_running_jobs(
emr_client, filtered_cluster_summaries, min_time, now=now)
_print_report(job_info)
def main(args=None):
option_parser = make_option_parser()
try:
options = parse_args(option_parser, args)
except OptionError:
option_parser.error('This tool takes exactly one argument.')
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
with EMRJobRunner(**runner_kwargs(options)) as runner:
perform_actions(options, runner)