Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
log = {'linear_regression_learner': {
'features': features,
'target': target,
'parameters': params,
'prediction_column': prediction_column,
'package': "sklearn",
'package_version': sk_version,
'feature_importance': dict(zip(features, regr.coef_.flatten())),
'training_samples': len(df)},
'object': regr}
return p, p(df), log
linear_regression_learner.__doc__ += learner_return_docstring("Linear Regression")
@curry
@log_learner_time(learner_name='xgb_regression_learner')
def xgb_regression_learner(df: pd.DataFrame,
features: List[str],
target: str,
learning_rate: float = 0.1,
num_estimators: int = 100,
extra_params: Dict[str, Any] = None,
prediction_column: str = "prediction",
weight_column: str = None,
encode_extra_cols: bool = True) -> LearnerReturnType:
"""
Fits an XGBoost regressor to the dataset. It first generates a DMatrix
with the specified features and labels from `df`. Then it fits a XGBoost
p.__doc__ = learner_pred_fn_docstring("isotonic_calibration_learner")
log = {'isotonic_calibration_learner': {
'output_column': output_column,
'target_column': target_column,
'prediction_column': prediction_column,
'package': "sklearn",
'package_version': sklearn.__version__,
'training_samples': len(df)},
'object': clf}
return p, p(df), log
isotonic_calibration_learner.__doc__ += learner_return_docstring("Isotonic Calibration")
predict_columns = training_columns
def p(new_data_set: pd.DataFrame) -> pd.DataFrame:
return new_data_set[predict_columns]
p.__doc__ = learner_pred_fn_docstring("selector")
log = {'selector': {
'training_columns': training_columns,
'predict_columns': predict_columns,
'transformed_column': list(set(training_columns).union(predict_columns))}}
return p, df[training_columns], log
selector.__doc__ += learner_return_docstring("Selector")
@curry
@log_learner_time(learner_name='capper')
def capper(df: pd.DataFrame,
columns_to_cap: List[str],
precomputed_caps: Dict[str, float] = None) -> LearnerReturnType:
"""
Learns the maximum value for each of the `columns_to_cap`
and used that as the cap for those columns. If precomputed caps
are passed, the function uses that as the cap value instead of
computing the maximum.
Parameters
----------
df : pandas.DataFrame
log = {'lgbm_classification_learner': {
'features': features,
'target': target,
'prediction_column': prediction_column,
'package': "lightgbm",
'package_version': lgbm.__version__,
'parameters': assoc(params, "num_estimators", num_estimators),
'feature_importance': dict(zip(features, bst.feature_importance().tolist())),
'training_samples': len(df)},
'object': bst}
return p, p(df), log
lgbm_classification_learner.__doc__ += learner_return_docstring("LGBM Classifier")
log = {'catboost_classification_learner': {
'features': features,
'target': target,
'prediction_column': prediction_column,
'package': "catboost",
'package_version': catboost.__version__,
'parameters': assoc(params, "num_estimators", num_estimators),
'feature_importance': cbr.feature_importances_,
'training_samples': len(df)},
'object': cbr}
return p, p(df), log
catboost_classification_learner.__doc__ += learner_return_docstring("catboost_classification_learner")
@curry
@log_learner_time(learner_name='nlp_logistic_classification_learner')
def nlp_logistic_classification_learner(df: pd.DataFrame,
text_feature_cols: List[str],
target: str,
vectorizer_params: LogType = None,
logistic_params: LogType = None,
prediction_column: str = "prediction") -> LearnerReturnType:
"""
Fits a text vectorizer (TfidfVectorizer) followed by
a logistic regression (LogisticRegression).
Parameters
----------
del ecdf
del values
del df_ecdf
def p(new_df: pd.DataFrame) -> pd.DataFrame:
if not ascending:
tind = np.searchsorted(-x, -new_df[prediction_column])
else:
tind = np.searchsorted(x, new_df[prediction_column], side) - 1
return new_df.assign(**{ecdf_column: y[tind].values})
return p, p(df), log
discrete_ecdfer.__doc__ += learner_return_docstring("Discrete ECDFer")
@curry
def prediction_ranger(df: pd.DataFrame,
prediction_min: float,
prediction_max: float,
prediction_column: str = "prediction") -> LearnerReturnType:
"""
Caps and floors the specified prediction column to a set range.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame that must contain a `prediction_column` columns.
prediction_min : float
log = {'xgb_classification_learner': {
'features': features,
'target': target,
'prediction_column': prediction_column,
'package': "xgboost",
'package_version': xgb.__version__,
'parameters': assoc(params, "num_estimators", num_estimators),
'feature_importance': bst.get_score(),
'training_samples': len(df)},
'object': bst}
return p, p(df), log
xgb_classification_learner.__doc__ += learner_return_docstring("XGboost Classifier")
@curry
@log_learner_time(learner_name='catboost_classification_learner')
def catboost_classification_learner(df: pd.DataFrame,
features: List[str],
target: str,
learning_rate: float = 0.1,
num_estimators: int = 100,
extra_params: LogType = None,
prediction_column: str = "prediction",
weight_column: str = None,
encode_extra_cols: bool = True) -> LearnerReturnType:
"""
Fits an CatBoost classifier to the dataset. It first generates a DMatrix
with the specified features and labels from `df`. Then, it fits a CatBoost
def p(new_data_set: pd.DataFrame) -> pd.DataFrame:
return new_data_set
p.__doc__ = learner_pred_fn_docstring("null_injector")
log = {'null_injector': {
"columns_to_inject": columns_to_inject,
"proportion": proportion,
"groups": groups
}}
return p, null_data, log
null_injector.__doc__ += learner_return_docstring("Null Injector")
@curry
@log_learner_time(learner_name='missing_warner')
def missing_warner(df: pd.DataFrame, cols_list: List[str],
new_column_name: str = "has_unexpected_missing",
detailed_warning: bool = False,
detailed_column_name: Optional[str] = None) -> LearnerReturnType:
"""
Creates a new column to warn about rows that columns that don't have missing in the training set
but have missing on the scoring
Parameters
----------
df : pandas.DataFrame
def p(new_data_set: pd.DataFrame) -> pd.DataFrame:
new_data = scaler.transform(new_data_set[columns_to_scale].values)
new_cols = pd.DataFrame(data=new_data, columns=columns_to_scale).to_dict('list')
return new_data_set.assign(**new_cols)
p.__doc__ = learner_pred_fn_docstring("standard_scaler")
log = {'standard_scaler': {
'standard_scaler': scaler.get_params(),
'transformed_column': columns_to_scale}}
return p, p(df), log
standard_scaler.__doc__ += learner_return_docstring("Standard Scaler")
@curry
@log_learner_time(learner_name='custom_transformer')
def custom_transformer(df: pd.DataFrame,
columns_to_transform: List[str],
transformation_function: Callable[[pd.DataFrame], pd.DataFrame],
is_vectorized: bool = False) -> LearnerReturnType:
"""
Applies a custom function to the desired columns.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame that must contain `columns`
col_dict = {}
for (key, value) in enumerate(pred.T):
col_dict.update({prediction_column + "_" + str(key): value})
elif supervised_type == 'regression':
col_dict = {prediction_column: model.predict(new_df[features].values)}
return new_df.assign(**col_dict)
p.__doc__ = learner_pred_fn_docstring("custom_supervised_model_learner")
log["object"] = model
return p, p(df), log
custom_supervised_model_learner.__doc__ += learner_return_docstring("Custom Supervised Model Learner")