How to use the fklearn.validation.validator.parallel_validator function in fklearn

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / tests / validation / test_validator.py View on Github external
def test_parallel_validator():
    result = parallel_validator(data, split_fn, train_fn, eval_fn, n_jobs=2)

    validator_log = result["validator_log"]

    assert len(validator_log) == 2
    assert validator_log[0]['fold_num'] == 0
    assert result['train_log'][0]['xgb_classification_learner']['features'] == ['f1']

    assert len(validator_log[0]['eval_results']) == 3

    assert validator_log[1]['fold_num'] == 1
    assert len(validator_log[1]['eval_results']) == 1
github nubank / fklearn / src / fklearn / tuning / parameter_tuners.py View on Github external
Random seed

    save_intermediary_fn : function(log) -> save to file
        Partially defined saver function that receives a log result from a
        tuning step and appends it into a file
        Example: save_intermediary_result(save_path='tuning.pkl')

    n_jobs : int
        Number of parallel processes to spawn when evaluating a training function

    Returns
    ----------
    tuning_log : list of dict
        A list of tuning log, each containing a training log and a validation log.
    """
    validation_fn = partial(parallel_validator, n_jobs=n_jobs) if n_jobs > 1 else validator

    def tune_iteration() -> ValidatorReturnType:
        iter_space = {k: space[k]() for k in space}
        train_fn = param_train_fn(iter_space)
        validator_log = validation_fn(train_data=train_set, split_fn=split_fn, train_fn=train_fn, eval_fn=eval_fn)

        if save_intermediary_fn is not None:
            save_intermediary_fn(validator_log)

        return validator_log

    seed(random_seed)

    return [tune_iteration() for _ in range(iterations)]
github nubank / fklearn / src / fklearn / tuning / parameter_tuners.py View on Github external
warm_start_file: str
        File containing intermediary results for grid search. If this file
        is present, we will perform grid search from the last combination of
        parameters.

    n_jobs : int
        Number of parallel processes to spawn when evaluating a training function


    Returns
    ----------
    tuning_log : list of dict
        A list of tuning log, each containing a training log and a validation log.
    """

    validation_fn = partial(parallel_validator, n_jobs=n_jobs) if n_jobs > 1 else validator

    def tune_iteration(iter_space: LogType) -> ValidatorReturnType:
        train_fn = param_train_fn(iter_space)
        validator_log = validation_fn(train_data=train_set, split_fn=split_fn, train_fn=train_fn, eval_fn=eval_fn)
        validator_log['iter_space'] = OrderedDict(sorted(iter_space.items()))

        if save_intermediary_fn is not None:
            save_intermediary_fn(validator_log)

        return validator_log

    sorted_space_keys = sorted(space.keys())
    params = (space[k]() for k in sorted_space_keys)
    combinations = set(product(*params))

    if warm_start_file is not None and load_intermediary_fn is not None:
github nubank / fklearn / src / fklearn / tuning / selectors.py View on Github external
Logs: list of list of dict
            A list log-like lists of dictionaries evaluations. Each element of the
            list is validation step of the algorithm.

    """

    selector_fn = remove_by_feature_importance(num_removed_by_step=num_removed_by_step)

    stop_fn = aggregate_stop_funcs(
        stop_by_no_improvement(extractor=extractor, metric_name=metric_name, early_stop=early_stop,
                               threshold=threshold),
        stop_by_iter_num(iter_limit=iter_limit),
        stop_by_num_features(min_num_features=min_remaining_features))

    train_fn = lambda df: param_train_fn(df, features)
    first_logs = parallel_validator(train_data, split_fn, train_fn, eval_fn, n_jobs=n_jobs)

    logs = [first_logs]
    while not stop_fn(logs):
        curr_log = first(logs)

        new_features = selector_fn(curr_log)
        new_train_fn = lambda df: param_train_fn(df, new_features)
        next_log = parallel_validator(train_data, split_fn, new_train_fn, eval_fn, n_jobs=n_jobs)

        if save_intermediary_fn is not None:
            save_intermediary_fn(next_log)

        logs = [next_log] + logs

    return logs
github nubank / fklearn / src / fklearn / tuning / selectors.py View on Github external
used_features = [list(mapcat(lambda key: features_sets[key], subset)) for subset in used_subsets]

    trainers = [lambda df: param_train_fn(df, feat) for feat in used_features]

    first_val_logs = [parallel_validator(train_data, split_fn, train_func, eval_fn, n_jobs) for train_func in trainers]
    logs = [[dict(log, **{"used_subsets": list(subset)}) for log, subset in zip(first_val_logs, used_subsets)]]

    while not stop_fn(logs):
        curr_log = first(logs)

        new_subsets = selector_fn(curr_log)
        new_features = [list(mapcat(lambda key: features_sets[key], subset)) for subset in new_subsets]

        trainers = [lambda df: param_train_fn(df, feat) for feat in new_features]

        val_logs = [parallel_validator(train_data, split_fn, train_func, eval_fn, n_jobs) for train_func in trainers]

        new_logs = [dict(log, **{"used_subsets": subset}) for log, subset in zip(val_logs, new_subsets)]

        if save_intermediary_fn is not None:
            save_intermediary_fn(new_logs)

        logs = [new_logs] + logs

    return logs
github nubank / fklearn / src / fklearn / tuning / selectors.py View on Github external
stop_fn = aggregate_stop_funcs(
        stop_by_no_improvement(extractor=extractor, metric_name=metric_name, early_stop=early_stop,
                               threshold=threshold),
        stop_by_iter_num(iter_limit=iter_limit),
        stop_by_num_features(min_num_features=min_remaining_features))

    train_fn = lambda df: param_train_fn(df, features)
    first_logs = parallel_validator(train_data, split_fn, train_fn, eval_fn, n_jobs=n_jobs)

    logs = [first_logs]
    while not stop_fn(logs):
        curr_log = first(logs)

        new_features = selector_fn(curr_log)
        new_train_fn = lambda df: param_train_fn(df, new_features)
        next_log = parallel_validator(train_data, split_fn, new_train_fn, eval_fn, n_jobs=n_jobs)

        if save_intermediary_fn is not None:
            save_intermediary_fn(next_log)

        logs = [next_log] + logs

    return logs