Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_stop_by_iter_num(logs):
assert stop_by_iter_num(logs, iter_limit=1)
assert not stop_by_iter_num(logs, iter_limit=10)
Returns
----------
logs: list of list of dict
A list log-like lists of dictionaries evaluations. Each element of the
list is validation step of the algorithm.
"""
selector_fn = remove_features_subsets(extractor=extractor,
metric_name=metric_name,
num_removed_by_step=num_removed_by_step)
stop_fn = aggregate_stop_funcs(
stop_by_no_improvement_parallel(extractor=extractor, metric_name=metric_name, early_stop=early_stop,
threshold=threshold),
stop_by_iter_num(iter_limit=iter_limit),
stop_by_num_features_parallel(extractor=extractor, metric_name=metric_name,
min_num_features=min_remaining_features)
)
used_subsets = [features_sets.keys()]
used_features = [list(mapcat(lambda key: features_sets[key], subset)) for subset in used_subsets]
trainers = [lambda df: param_train_fn(df, feat) for feat in used_features]
first_val_logs = [parallel_validator(train_data, split_fn, train_func, eval_fn, n_jobs) for train_func in trainers]
logs = [[dict(log, **{"used_subsets": list(subset)}) for log, subset in zip(first_val_logs, used_subsets)]]
while not stop_fn(logs):
curr_log = first(logs)
Number of parallel processes to spawn.
Returns
----------
Logs: list of list of dict
A list log-like lists of dictionaries evaluations. Each element of the
list is validation step of the algorithm.
"""
selector_fn = remove_by_feature_importance(num_removed_by_step=num_removed_by_step)
stop_fn = aggregate_stop_funcs(
stop_by_no_improvement(extractor=extractor, metric_name=metric_name, early_stop=early_stop,
threshold=threshold),
stop_by_iter_num(iter_limit=iter_limit),
stop_by_num_features(min_num_features=min_remaining_features))
train_fn = lambda df: param_train_fn(df, features)
first_logs = parallel_validator(train_data, split_fn, train_fn, eval_fn, n_jobs=n_jobs)
logs = [first_logs]
while not stop_fn(logs):
curr_log = first(logs)
new_features = selector_fn(curr_log)
new_train_fn = lambda df: param_train_fn(df, new_features)
next_log = parallel_validator(train_data, split_fn, new_train_fn, eval_fn, n_jobs=n_jobs)
if save_intermediary_fn is not None:
save_intermediary_fn(next_log)
selector_fn = remove_by_feature_shuffling(eval_fn=eval_fn,
eval_data=test_data,
extractor=extractor,
metric_name=metric_name,
max_removed_by_step=max_removed_by_step,
threshold=threshold,
speed_up_by_importance=speed_up_by_importance,
parallel=parallel,
nthread=nthread,
seed=seed)
stop_fn = aggregate_stop_funcs(
stop_by_no_improvement(extractor=extractor, metric_name=metric_name, early_stop=early_stop,
threshold=threshold),
stop_by_iter_num(iter_limit=iter_limit),
stop_by_num_features(min_num_features=min_remaining_features)
)
predict_fn_first, _, train_logs = param_train_fn(train_data, features)
eval_logs = eval_fn(predict_fn_first(test_data))
first_logs = {
'train_log': train_logs,
'validator_log': [
{
'fold_num': 0,
'split_log': {
'train_size': train_data.shape[0],
'test_size': test_data.shape[0]
},
'eval_results': [eval_logs]