How to use the fklearn.common_docstrings.learner_return_docstring function in fklearn

To help you get started, we’ve selected a few fklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nubank / fklearn / src / fklearn / training / regression.py View on Github external
log = {'linear_regression_learner': {
        'features': features,
        'target': target,
        'parameters': params,
        'prediction_column': prediction_column,
        'package': "sklearn",
        'package_version': sk_version,
        'feature_importance': dict(zip(features, regr.coef_.flatten())),
        'training_samples': len(df)},
        'object': regr}

    return p, p(df), log


linear_regression_learner.__doc__ += learner_return_docstring("Linear Regression")


@curry
@log_learner_time(learner_name='xgb_regression_learner')
def xgb_regression_learner(df: pd.DataFrame,
                           features: List[str],
                           target: str,
                           learning_rate: float = 0.1,
                           num_estimators: int = 100,
                           extra_params: Dict[str, Any] = None,
                           prediction_column: str = "prediction",
                           weight_column: str = None,
                           encode_extra_cols: bool = True) -> LearnerReturnType:
    """
    Fits an XGBoost regressor to the dataset. It first generates a DMatrix
    with the specified features and labels from `df`. Then it fits a XGBoost
github nubank / fklearn / src / fklearn / training / calibration.py View on Github external
p.__doc__ = learner_pred_fn_docstring("isotonic_calibration_learner")

    log = {'isotonic_calibration_learner': {
        'output_column': output_column,
        'target_column': target_column,
        'prediction_column': prediction_column,
        'package': "sklearn",
        'package_version': sklearn.__version__,
        'training_samples': len(df)},
        'object': clf}

    return p, p(df), log


isotonic_calibration_learner.__doc__ += learner_return_docstring("Isotonic Calibration")
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
predict_columns = training_columns

    def p(new_data_set: pd.DataFrame) -> pd.DataFrame:
        return new_data_set[predict_columns]

    p.__doc__ = learner_pred_fn_docstring("selector")

    log = {'selector': {
        'training_columns': training_columns,
        'predict_columns': predict_columns,
        'transformed_column': list(set(training_columns).union(predict_columns))}}

    return p, df[training_columns], log


selector.__doc__ += learner_return_docstring("Selector")


@curry
@log_learner_time(learner_name='capper')
def capper(df: pd.DataFrame,
           columns_to_cap: List[str],
           precomputed_caps: Dict[str, float] = None) -> LearnerReturnType:
    """
    Learns the maximum value for each of the `columns_to_cap`
    and used that as the cap for those columns. If precomputed caps
    are passed, the function uses that as the cap value instead of
    computing the maximum.

    Parameters
    ----------
    df : pandas.DataFrame
github nubank / fklearn / src / fklearn / training / classification.py View on Github external
log = {'lgbm_classification_learner': {
        'features': features,
        'target': target,
        'prediction_column': prediction_column,
        'package': "lightgbm",
        'package_version': lgbm.__version__,
        'parameters': assoc(params, "num_estimators", num_estimators),
        'feature_importance': dict(zip(features, bst.feature_importance().tolist())),
        'training_samples': len(df)},
        'object': bst}

    return p, p(df), log


lgbm_classification_learner.__doc__ += learner_return_docstring("LGBM Classifier")
github nubank / fklearn / src / fklearn / training / classification.py View on Github external
log = {'catboost_classification_learner': {
        'features': features,
        'target': target,
        'prediction_column': prediction_column,
        'package': "catboost",
        'package_version': catboost.__version__,
        'parameters': assoc(params, "num_estimators", num_estimators),
        'feature_importance': cbr.feature_importances_,
        'training_samples': len(df)},
        'object': cbr}

    return p, p(df), log


catboost_classification_learner.__doc__ += learner_return_docstring("catboost_classification_learner")


@curry
@log_learner_time(learner_name='nlp_logistic_classification_learner')
def nlp_logistic_classification_learner(df: pd.DataFrame,
                                        text_feature_cols: List[str],
                                        target: str,
                                        vectorizer_params: LogType = None,
                                        logistic_params: LogType = None,
                                        prediction_column: str = "prediction") -> LearnerReturnType:
    """
    Fits a text vectorizer (TfidfVectorizer) followed by
    a logistic regression (LogisticRegression).

    Parameters
    ----------
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
del ecdf
    del values
    del df_ecdf

    def p(new_df: pd.DataFrame) -> pd.DataFrame:
        if not ascending:
            tind = np.searchsorted(-x, -new_df[prediction_column])
        else:
            tind = np.searchsorted(x, new_df[prediction_column], side) - 1

        return new_df.assign(**{ecdf_column: y[tind].values})

    return p, p(df), log


discrete_ecdfer.__doc__ += learner_return_docstring("Discrete ECDFer")


@curry
def prediction_ranger(df: pd.DataFrame,
                      prediction_min: float,
                      prediction_max: float,
                      prediction_column: str = "prediction") -> LearnerReturnType:
    """
    Caps and floors the specified prediction column to a set range.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain a `prediction_column` columns.

    prediction_min : float
github nubank / fklearn / src / fklearn / training / classification.py View on Github external
log = {'xgb_classification_learner': {
        'features': features,
        'target': target,
        'prediction_column': prediction_column,
        'package': "xgboost",
        'package_version': xgb.__version__,
        'parameters': assoc(params, "num_estimators", num_estimators),
        'feature_importance': bst.get_score(),
        'training_samples': len(df)},
        'object': bst}

    return p, p(df), log


xgb_classification_learner.__doc__ += learner_return_docstring("XGboost Classifier")


@curry
@log_learner_time(learner_name='catboost_classification_learner')
def catboost_classification_learner(df: pd.DataFrame,
                                    features: List[str],
                                    target: str,
                                    learning_rate: float = 0.1,
                                    num_estimators: int = 100,
                                    extra_params: LogType = None,
                                    prediction_column: str = "prediction",
                                    weight_column: str = None,
                                    encode_extra_cols: bool = True) -> LearnerReturnType:
    """
    Fits an CatBoost classifier to the dataset. It first generates a DMatrix
    with the specified features and labels from `df`. Then, it fits a CatBoost
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
def p(new_data_set: pd.DataFrame) -> pd.DataFrame:
        return new_data_set

    p.__doc__ = learner_pred_fn_docstring("null_injector")

    log = {'null_injector': {
        "columns_to_inject": columns_to_inject,
        "proportion": proportion,
        "groups": groups
    }}

    return p, null_data, log


null_injector.__doc__ += learner_return_docstring("Null Injector")


@curry
@log_learner_time(learner_name='missing_warner')
def missing_warner(df: pd.DataFrame, cols_list: List[str],
                   new_column_name: str = "has_unexpected_missing",
                   detailed_warning: bool = False,
                   detailed_column_name: Optional[str] = None) -> LearnerReturnType:
    """
    Creates a new column to warn about rows that columns that don't have missing in the training set
    but have missing on the scoring

    Parameters
    ----------

    df : pandas.DataFrame
github nubank / fklearn / src / fklearn / training / transformation.py View on Github external
def p(new_data_set: pd.DataFrame) -> pd.DataFrame:
        new_data = scaler.transform(new_data_set[columns_to_scale].values)
        new_cols = pd.DataFrame(data=new_data, columns=columns_to_scale).to_dict('list')
        return new_data_set.assign(**new_cols)

    p.__doc__ = learner_pred_fn_docstring("standard_scaler")

    log = {'standard_scaler': {
        'standard_scaler': scaler.get_params(),
        'transformed_column': columns_to_scale}}

    return p, p(df), log


standard_scaler.__doc__ += learner_return_docstring("Standard Scaler")


@curry
@log_learner_time(learner_name='custom_transformer')
def custom_transformer(df: pd.DataFrame,
                       columns_to_transform: List[str],
                       transformation_function: Callable[[pd.DataFrame], pd.DataFrame],
                       is_vectorized: bool = False) -> LearnerReturnType:
    """
    Applies a custom function to the desired columns.

    Parameters
    ----------
    df : pandas.DataFrame
        A Pandas' DataFrame that must contain `columns`
github nubank / fklearn / src / fklearn / training / regression.py View on Github external
col_dict = {}
            for (key, value) in enumerate(pred.T):
                col_dict.update({prediction_column + "_" + str(key): value})
        elif supervised_type == 'regression':
            col_dict = {prediction_column: model.predict(new_df[features].values)}

        return new_df.assign(**col_dict)

    p.__doc__ = learner_pred_fn_docstring("custom_supervised_model_learner")

    log["object"] = model

    return p, p(df), log


custom_supervised_model_learner.__doc__ += learner_return_docstring("Custom Supervised Model Learner")