Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main(cl_args=None):
arg_parser = _make_arg_parser()
options = arg_parser.parse_args(cl_args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
runner_kwargs = {k: v for k, v in options.__dict__.items()
if k not in ('quiet', 'verbose', 'step_id')}
runner = EMRJobRunner(**runner_kwargs)
emr_client = runner.make_emr_client()
# pick step
step = _get_step(emr_client, options.cluster_id, options.step_id)
if not step:
raise SystemExit(1)
if step['Status']['State'] != 'FAILED':
log.warning('step %s has state %s, not FAILED' %
(step['Id'], step['Status']['State']))
file = open(output_file, 'w+')
for line in runner.stream_output():
file.write(line)
file.close()
if __name__ == '__main__':
# Make sure we got proper arguments.
if len(argv) < 3 or len(argv) > 5:
exit("Usage: python driver.py " + \
"[input file] [output file] [options]")
# Determine if we're running locally or on EMR.
LOCAL = not (len(argv) > 3 and argv[3] == '-emr')
# Output all logging information to STDOUT.
MRJob.set_up_logging(verbose=True, stream=stdout)
# Run all the MapReduce workers sequentially.
run_job(wordFrequency, argv[1], 'frequencies.txt')
run_job(wordCount, 'frequencies.txt', 'counts.txt')
run_job(corpusFrequency, 'counts.txt', 'corpus.txt')
run_job(calculateScore, 'corpus.txt', argv[2])
help='ID of cluster to run command on')
arg_parser.add_argument(dest='cmd_string',
help='command to run, as a single string')
_add_basic_args(arg_parser)
_add_runner_args(
arg_parser,
{'ec2_key_pair_file', 'ssh_bin'} | _filter_by_role(
EMRJobRunner.OPT_NAMES, 'connect')
)
_alphabetize_actions(arg_parser)
options = arg_parser.parse_args(cl_args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
runner_kwargs = options.__dict__.copy()
for unused_arg in ('cluster_id', 'cmd_string', 'output_dir',
'quiet', 'verbose'):
del runner_kwargs[unused_arg]
cmd_args = shlex_split(options.cmd_string)
output_dir = os.path.abspath(options.output_dir or options.cluster_id)
with EMRJobRunner(
cluster_id=options.cluster_id, **runner_kwargs) as runner:
_run_on_all_nodes(runner, output_dir, cmd_args)
def main(cl_args=None):
# parser command-line args
arg_parser = _make_arg_parser()
options = arg_parser.parse_args(cl_args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
# create the persistent job
runner = EMRJobRunner(**_runner_kwargs(options))
log.debug('Terminating cluster %s' % options.cluster_id)
runner.make_emr_client().terminate_job_flows(
JobFlowIds=[options.cluster_id])
log.info('Terminated cluster %s' % options.cluster_id)
def main(cl_args=None):
arg_parser = _make_arg_parser()
options = arg_parser.parse_args(cl_args)
MRJob.set_up_logging(quiet=options.quiet,
verbose=options.verbose)
max_mins_idle = options.max_mins_idle
_maybe_terminate_clusters(
dry_run=options.dry_run,
max_mins_idle=max_mins_idle,
unpooled_only=options.unpooled_only,
now=_boto3_now(),
pool_name=options.pool_name,
pooled_only=options.pooled_only,
max_mins_locked=options.max_mins_locked,
quiet=options.quiet,
**_runner_kwargs(options)
)
def main(args=None):
option_parser = make_option_parser()
try:
options = parse_args(option_parser, args)
except OptionError:
option_parser.error('This tool takes exactly one argument.')
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
with EMRJobRunner(**runner_kwargs(options)) as runner:
perform_actions(options, runner)
def main(args=None):
# parse command-line args
arg_parser = _make_arg_parser()
options = arg_parser.parse_args(args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
now = _boto3_now()
log.info('getting cluster history...')
clusters = list(_yield_clusters(
max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options)))
log.info('compiling cluster stats...')
stats = _clusters_to_stats(clusters, now=now)
_print_report(stats, now=now)
def main():
option_parser = make_option_parser()
try:
options = parse_args(option_parser)
except OptionError:
option_parser.error('This tool takes no arguments.')
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
log.warning('job_flow_pool is deprecated and will be removed in v0.5.0')
with EMRJobRunner(**runner_kwargs(options)) as runner:
perform_actions(options, runner)
def _runner_kwargs(cl_args=None):
"""Parse command line arguments into arguments for
:py:class:`EMRJobRunner`
"""
# parser command-line args
arg_parser = _make_arg_parser()
options = arg_parser.parse_args(cl_args)
MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)
# create the persistent job
kwargs = options.__dict__.copy()
del kwargs['quiet']
del kwargs['verbose']
return kwargs