Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@log_learner_time(learner_name='xgb_octopus_classification_learner')
def xgb_octopus_classification_learner(train_set: pd.DataFrame,
learning_rate_by_bin: Dict[T, float],
num_estimators_by_bin: Dict[T, int],
extra_params_by_bin: Dict[T, Dict[str, Any]],
features_by_bin: Dict[T, List[str]],
train_split_col: str,
train_split_bins: List,
nthread: int,
target_column: str,
prediction_column: str = "prediction") -> LearnerReturnType:
"""
Octopus ensemble allows you to inject domain specific knowledge to force a split in an initial feature, instead of
assuming the tree model will do that intelligent split on its own. It works by first defining a split on your
dataset and then training one individual model in each separated dataset.
@log_learner_time(learner_name='xgb_regression_learner')
def xgb_regression_learner(df: pd.DataFrame,
features: List[str],
target: str,
learning_rate: float = 0.1,
num_estimators: int = 100,
extra_params: Dict[str, Any] = None,
prediction_column: str = "prediction",
weight_column: str = None,
encode_extra_cols: bool = True) -> LearnerReturnType:
"""
Fits an XGBoost regressor to the dataset. It first generates a DMatrix
with the specified features and labels from `df`. Then it fits a XGBoost
model to this DMatrix. Return the predict function for the model and the
predictions for the input dataset.
Parameters
@log_learner_time(learner_name='catboost_classification_learner')
def catboost_classification_learner(df: pd.DataFrame,
features: List[str],
target: str,
learning_rate: float = 0.1,
num_estimators: int = 100,
extra_params: LogType = None,
prediction_column: str = "prediction",
weight_column: str = None,
encode_extra_cols: bool = True) -> LearnerReturnType:
"""
Fits an CatBoost classifier to the dataset. It first generates a DMatrix
with the specified features and labels from `df`. Then, it fits a CatBoost
model to this DMatrix. Return the predict function for the model and the
predictions for the input dataset.
Parameters
@log_learner_time(learner_name='isolation_forest_learner')
def isolation_forest_learner(df: pd.DataFrame,
features: List[str],
params: Dict[str, Any] = None,
prediction_column: str = "prediction",
encode_extra_cols: bool = True) -> LearnerReturnType:
"""
Fits an anomaly detection algorithm (Isolation Forest) to the dataset
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame with features and target columns.
The model will be trained to predict the target column
from the features.
features : list of str
@log_learner_time(learner_name='target_categorizer')
def target_categorizer(df: pd.DataFrame,
columns_to_categorize: List[str],
target_column: str,
smoothing: float = 1.0,
ignore_unseen: bool = True,
store_mapping: bool = False) -> LearnerReturnType:
"""
Replaces categorical variables with the smoothed mean of the target variable by category.
Uses a weighted average with the overall mean of the target variable for smoothing.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame that must contain `columns_to_categorize` and `target_column` columns.
columns_to_categorize : list of str
@log_learner_time(learner_name='null_injector')
def null_injector(df: pd.DataFrame,
proportion: float,
columns_to_inject: Optional[List[str]] = None,
groups: Optional[List[List[str]]] = None,
seed: int = 1) -> LearnerReturnType:
"""
Applies a custom function to the desired columns.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame that must contain `columns_to_inject` as columns
columns_to_inject : list of str
A list of features to inject nulls. If groups is not None it will be ignored.
@log_learner_time(learner_name="truncate_categorical")
def truncate_categorical(df: pd.DataFrame,
columns_to_truncate: List[str],
percentile: float,
replacement: Union[str, float] = -9999,
replace_unseen: Union[str, float] = -9999,
store_mapping: bool = False) -> LearnerReturnType:
"""
Truncate infrequent categories and replace them by a single one.
You can think of it like "others" category.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame that must contain a `prediction_column` columns.
columns_to_truncate : list of str
@log_learner_time(learner_name='catboost_regressor_learner')
def catboost_regressor_learner(df: pd.DataFrame,
features: List[str],
target: str,
learning_rate: float = 0.1,
num_estimators: int = 100,
extra_params: Dict[str, Any] = None,
prediction_column: str = "prediction",
weight_column: str = None) -> LearnerReturnType:
"""
Fits an CatBoost regressor to the dataset. It first generates a Pool
with the specified features and labels from `df`. Then it fits a CatBoost
model to this Pool. Return the predict function for the model and the
predictions for the input dataset.
Parameters
----------
@log_learner_time(learner_name='custom_supervised_model_learner')
def custom_supervised_model_learner(df: pd.DataFrame,
features: List[str],
target: str,
model: Any,
supervised_type: str,
log: Dict[str, Dict],
prediction_column: str = "prediction") -> LearnerReturnType:
"""
Fits a custom model to the dataset.
Return the predict function, the predictions for the input dataset and a log describing the model.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame with features and target columns.
@log_learner_time(learner_name='logistic_classification_learner')
def logistic_classification_learner(df: pd.DataFrame,
features: List[str],
target: str,
params: LogType = None,
prediction_column: str = "prediction",
weight_column: str = None,
encode_extra_cols: bool = True) -> LearnerReturnType:
"""
Fits an logistic regression classifier to the dataset. Return the predict function
for the model and the predictions for the input dataset.
Parameters
----------
df : pandas.DataFrame
A Pandas' DataFrame with features and target columns.