How to use the mrjob.job.MRJob.set_up_logging function in mrjob

To help you get started, we’ve selected a few mrjob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Yelp / mrjob / mrjob / tools / diagnose.py View on Github external
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = {k: v for k, v in options.__dict__.items()
                     if k not in ('quiet', 'verbose', 'step_id')}

    runner = EMRJobRunner(**runner_kwargs)
    emr_client = runner.make_emr_client()

    # pick step
    step = _get_step(emr_client, options.cluster_id, options.step_id)

    if not step:
        raise SystemExit(1)

    if step['Status']['State'] != 'FAILED':
        log.warning('step %s has state %s, not FAILED' %
                    (step['Id'], step['Status']['State']))
github louismullie / tf-idf-emr / driver.py View on Github external
file = open(output_file, 'w+')
    for line in runner.stream_output():
      file.write(line)
    file.close()

if __name__ == '__main__':
  # Make sure we got proper arguments.
  if len(argv) < 3 or len(argv) > 5:
    exit("Usage: python driver.py " + \
    "[input file] [output file] [options]")

  # Determine if we're running locally or on EMR.
  LOCAL = not (len(argv) > 3 and argv[3] == '-emr')

  # Output all logging information to STDOUT.
  MRJob.set_up_logging(verbose=True, stream=stdout)

  # Run all the MapReduce workers sequentially.
  run_job(wordFrequency, argv[1], 'frequencies.txt')
  run_job(wordCount, 'frequencies.txt', 'counts.txt')
  run_job(corpusFrequency, 'counts.txt', 'corpus.txt')
  run_job(calculateScore, 'corpus.txt', argv[2])
github Yelp / mrjob / mrjob / tools / emr / mrboss.py View on Github external
help='ID of cluster to run command on')
    arg_parser.add_argument(dest='cmd_string',
                            help='command to run, as a single string')

    _add_basic_args(arg_parser)
    _add_runner_args(
        arg_parser,
        {'ec2_key_pair_file', 'ssh_bin'} | _filter_by_role(
            EMRJobRunner.OPT_NAMES, 'connect')
    )

    _alphabetize_actions(arg_parser)

    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    runner_kwargs = options.__dict__.copy()
    for unused_arg in ('cluster_id', 'cmd_string', 'output_dir',
                       'quiet', 'verbose'):
        del runner_kwargs[unused_arg]

    cmd_args = shlex_split(options.cmd_string)

    output_dir = os.path.abspath(options.output_dir or options.cluster_id)

    with EMRJobRunner(
            cluster_id=options.cluster_id, **runner_kwargs) as runner:
        _run_on_all_nodes(runner, output_dir, cmd_args)
github Yelp / mrjob / mrjob / tools / emr / terminate_cluster.py View on Github external
def main(cl_args=None):
    # parser command-line args
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    runner = EMRJobRunner(**_runner_kwargs(options))
    log.debug('Terminating cluster %s' % options.cluster_id)
    runner.make_emr_client().terminate_job_flows(
        JobFlowIds=[options.cluster_id])
    log.info('Terminated cluster %s' % options.cluster_id)
github Yelp / mrjob / mrjob / tools / emr / terminate_idle_clusters.py View on Github external
def main(cl_args=None):
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet,
                         verbose=options.verbose)

    max_mins_idle = options.max_mins_idle

    _maybe_terminate_clusters(
        dry_run=options.dry_run,
        max_mins_idle=max_mins_idle,
        unpooled_only=options.unpooled_only,
        now=_boto3_now(),
        pool_name=options.pool_name,
        pooled_only=options.pooled_only,
        max_mins_locked=options.max_mins_locked,
        quiet=options.quiet,
        **_runner_kwargs(options)
    )
github Yelp / mrjob / mrjob / tools / emr / fetch_logs.py View on Github external
def main(args=None):
    option_parser = make_option_parser()
    try:
        options = parse_args(option_parser, args)
    except OptionError:
        option_parser.error('This tool takes exactly one argument.')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    with EMRJobRunner(**runner_kwargs(options)) as runner:
        perform_actions(options, runner)
github Yelp / mrjob / mrjob / tools / emr / audit_usage.py View on Github external
def main(args=None):
    # parse command-line args
    arg_parser = _make_arg_parser()

    options = arg_parser.parse_args(args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    now = _boto3_now()

    log.info('getting cluster history...')
    clusters = list(_yield_clusters(
        max_days_ago=options.max_days_ago, now=now, **_runner_kwargs(options)))

    log.info('compiling cluster stats...')
    stats = _clusters_to_stats(clusters, now=now)

    _print_report(stats, now=now)
github Yelp / mrjob / mrjob / tools / emr / job_flow_pool.py View on Github external
def main():
    option_parser = make_option_parser()
    try:
        options = parse_args(option_parser)
    except OptionError:
        option_parser.error('This tool takes no arguments.')

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    log.warning('job_flow_pool is deprecated and will be removed in v0.5.0')

    with EMRJobRunner(**runner_kwargs(options)) as runner:
        perform_actions(options, runner)
github Yelp / mrjob / mrjob / tools / emr / create_cluster.py View on Github external
def _runner_kwargs(cl_args=None):
    """Parse command line arguments into arguments for
    :py:class:`EMRJobRunner`
    """
    # parser command-line args
    arg_parser = _make_arg_parser()
    options = arg_parser.parse_args(cl_args)

    MRJob.set_up_logging(quiet=options.quiet, verbose=options.verbose)

    # create the persistent job
    kwargs = options.__dict__.copy()

    del kwargs['quiet']
    del kwargs['verbose']

    return kwargs