Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
base_extractor=base_extractors)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
# Define extractors
base_extractors = combined_evaluator_extractor(base_extractors=[
evaluator_extractor(evaluator_name="r2_evaluator__target"),
evaluator_extractor(evaluator_name="spearman_evaluator__target")
])
splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
base_extractor=base_extractors)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
base_extractor=base_extractors)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
evaluator_extractor(evaluator_name="spearman_evaluator__target")
])
splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
base_extractor=base_extractors)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
])
splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
base_extractor=base_extractors)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
base_extractors = combined_evaluator_extractor(base_extractors=[
evaluator_extractor(evaluator_name="r2_evaluator__target"),
evaluator_extractor(evaluator_name="spearman_evaluator__target")
])
splitter_extractor = split_evaluator_extractor(split_col='RAD', split_values=[4.0, 5.0, 24.0],
base_extractor=base_extractors)
temporal_week_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y-%W', base_extractor=base_extractors)
temporal_year_splitter_extractor = temporal_split_evaluator_extractor(
time_col='time', time_format='%Y', base_extractor=base_extractors)
assert extract(cv_results, base_extractors).shape == (5, 9)
assert extract(cv_results, splitter_extractor).shape == (15, 10)
assert extract(tlc_results, base_extractors).shape == (12, 9)
assert extract(tlc_results, splitter_extractor).shape == (36, 10)
assert extract(sc_results, base_extractors).shape == (5, 9)
assert extract(sc_results, splitter_extractor).shape == (15, 10)
assert extract(fw_sc_results, base_extractors).shape == (3, 9)
assert extract(fw_sc_results, splitter_extractor).shape == (9, 10)
n_time_week_folds = len(df['time'].dt.strftime('%Y-%W').unique())
n_time_year_folds = len(df['time'].dt.strftime('%Y').unique())
assert temporal_week_splitter_extractor(temporal_week_results).shape == (n_time_week_folds, 3)
assert temporal_year_splitter_extractor(temporal_year_results).shape == (n_time_year_folds, 3)
"cat": ["c1", "c1", "c2", None, "c2", "c4"],
'y': [2.3, 4.0, 100.0, -3.9, 100.0, -3.9]
})
df_test = pd.DataFrame({
'id': ["id4", "id4", "id5", "id6", "id5", "id6"],
'x1': [12.0, 1000.0, -4.0, 0.0, -4.0, 0.0],
"x2": [1, 1, 0, None, 0, 1],
"cat": ["c1", "c2", "c5", None, "c2", "c3"],
'y': [1.3, -4.0, 0.0, 49, 0.0, 49]
})
features = ["x1", "x2", "cat"]
target = "y"
train_fn = build_pipeline(
placeholder_imputer(columns_to_impute=["x1", "x2"], placeholder_value=-999),
onehot_categorizer(columns_to_categorize=["cat"], hardcode_nans=True),
xgb_regression_learner(features=features,
target=target,
num_estimators=20,
extra_params={"seed": 42}))
predict_fn, pred_train, log = train_fn(df_train)
pred_test = predict_fn(df_test)
expected_feature_columns_after_encoding = ["x1", "x2", "fklearn_feat__cat==c1", "fklearn_feat__cat==c2",
"fklearn_feat__cat==c4", "fklearn_feat__cat==nan"]
assert set(pred_test.columns) == set(expected_feature_columns_after_encoding + ["id", target, "prediction"])
def test_build_pipeline_predict_arguments_assertion():
test_df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]})
@fp.curry
def invalid_learner(df):
def p(dataset, *a, **b):
return dataset + len(a) + len(b)
return p, df, {}
with pytest.raises(ValueError):
build_pipeline(invalid_learner)(test_df)