Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_step_num(self):
ex = StepFailedException(step_num=0)
self.assertEqual(str(ex), 'Step 1 failed')
self.assertEqual(repr(ex), 'StepFailedException(step_num=0)')
def test_no_such_file(self):
missing_hadoop_bin = os.path.join(self.tmp_dir, 'no-hadoop-here')
job = MRTwoStepJob(['-r', 'hadoop'])
job.sandbox()
with job.make_runner() as runner:
self.patch_args_for_step(runner, missing_hadoop_bin)
self.assertRaises(StepFailedException, runner.run)
def test_spark_job_failure(self):
job = MRSparKaboom(['-r', 'spark'])
job.sandbox(stdin=BytesIO(b'line\n'))
with job.make_runner() as runner:
self.assertRaises(StepFailedException, runner.run)
def test_num_steps_with_no_step_num(self):
ex = StepFailedException(num_steps=4)
self.assertEqual(str(ex), 'Step failed')
self.assertEqual(repr(ex), 'StepFailedException(num_steps=4)')
def test_dont_destroy_other_pooled_cluster_on_failure(self):
# Issue 242: job failure shouldn't kill the pooled clusters
_, cluster_id = self.make_pooled_cluster()
self.mock_emr_failures = set([(cluster_id, 0)])
mr_job = MRTwoStepJob(['-r', 'emr', '-v',
'--pool-clusters'])
mr_job.sandbox()
self.mock_emr_failures = set([('j-MOCKCLUSTER0', 0)])
with mr_job.make_runner() as runner:
self.assertIsInstance(runner, EMRJobRunner)
self.prepare_runner_for_ssh(runner)
self.assertRaises(StepFailedException, runner.run)
self.assertEqual(runner.get_cluster_id(), cluster_id)
for _ in range(10):
self.simulate_emr_progress(runner.get_cluster_id())
cluster = runner._describe_cluster()
self.assertEqual(cluster['Status']['State'], 'WAITING')
# job shouldn't get terminated by cleanup
for _ in range(10):
self.simulate_emr_progress(runner.get_cluster_id())
cluster = runner._describe_cluster()
self.assertEqual(cluster['Status']['State'], 'WAITING')
def test_empty(self):
# this doesn't work on the inline runner because
# Spark doesn't have a working dir to upload stop_words.txt
# to. See below for what does and doesn't work in inline
# runner
job = MRSparkMostUsedWord(['-r', 'local'])
job.sandbox()
with job.make_runner() as runner:
# still doesn't work because you can't run max() on no records
self.assertRaises(StepFailedException, runner.run)
def test_non_oserror_exception(self):
self.start(patch('os.execvpe', side_effect=KeyboardInterrupt))
job = MRSparkWordcount(['-r', 'local'])
job.sandbox()
with job.make_runner() as runner:
self.assertRaises(StepFailedException, runner.run)
Called from :py:meth:`run`. You'd probably only want to call this
directly from automated tests.
"""
# self.stderr is strictly binary, need to wrap it so it's possible
# to log to it in Python 3
log_stream = codecs.getwriter('utf_8')(self.stderr)
self.set_up_logging(quiet=self.options.quiet,
verbose=self.options.verbose,
stream=log_stream)
with self.make_runner() as runner:
try:
runner.run()
except StepFailedException as e:
# no need for a runner stacktrace if step failed; runners will
# log more useful information anyway
log.error(str(e))
sys.exit(1)
if self._should_cat_output():
for chunk in runner.cat_output():
self.stdout.write(chunk)
self.stdout.flush()
if 'output_dir' not in step_interpretation:
step_interpretation['output_dir'] = (
self._step_output_uri(step_num))
log_interpretation['step'] = step_interpretation
self._log_counters(log_interpretation, step_num)
if returncode:
error = self._pick_error(log_interpretation, step_type)
if error:
_log_probable_cause_of_failure(log, error)
# use CalledProcessError's well-known message format
reason = str(CalledProcessError(returncode, step_args))
raise StepFailedException(
reason=reason, step_num=step_num,
num_steps=self._num_steps())
log.info(_format_counters(
counter_dict,
desc=('Counters for step %d' % desc_num)))
# for non-streaming steps, there are no counters.
# pad self._counters to match number of steps
while len(self._counters) < (last_step_num or step_num) + 1:
self._counters.append({})
if returncode:
error = _pick_error(dict(step=step_interpretation))
if error:
_log_probable_cause_of_failure(log, error)
reason = str(CalledProcessError(returncode, spark_submit_args))
raise StepFailedException(
reason=reason, step_num=step_num, last_step_num=last_step_num,
num_steps=self._num_steps())