How to use the dbnd.log_metric function in dbnd

To help you get started, we’ve selected a few dbnd examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / wine_quality / wine_quality_decorators_py3.py View on Github external
test_set: DataFrame,
    training_set: DataFrame,
    alpha: float = 0.5,
    l1_ratio: float = 0.5,
) -> ElasticNet:
    """ Train wine prediction model """
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    lr.fit(training_set.drop(["quality"], 1), training_set[["quality"]])
    prediction = lr.predict(test_set.drop(["quality"], 1))

    (rmse, mae, r2) = calculate_metrics(test_set[["quality"]], prediction)

    log_metric("alpha", alpha)
    log_metric("rmse", rmse)
    log_metric("mae", rmse)
    log_metric("r2", r2)

    logging.info(
        "Elasticnet model (alpha=%f, l1_ratio=%f): rmse = %f, mae = %f, r2 = %f",
        alpha,
        l1_ratio,
        rmse,
        mae,
        r2,
    )
    return lr
github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / train / train_pipeline.py View on Github external
def train_model(
    test_set: pd.DataFrame,
    training_set: pd.DataFrame,
    alpha: float = 1.0,
    l1_ratio: float = 0.5,
) -> ElasticNet:
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    lr.fit(training_set.drop(["target"], 1), training_set[["target"]])
    prediction = lr.predict(test_set.drop(["target"], 1))

    (rmse, mae, r2) = calculate_metrics(test_set[["target"]], prediction)

    log_metric("rmse", rmse)
    log_metric("mae", mae)
    log_metric("r2", r2)

    logging.info(
        "Elasticnet model (alpha=%f, l1_ratio=%f): rmse = %f, mae = %f, r2 = %f",
        alpha,
        l1_ratio,
        rmse,
        mae,
        r2,
    )
    return lr
github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / train / train_via_spark.py View on Github external
def split_data_spark(
    raw_data: spark.DataFrame,
) -> Tuple[spark.DataFrame, spark.DataFrame, spark.DataFrame]:

    columns_to_remove = set(["id", "0_norm", "10_norm"])
    if columns_to_remove.issubset(list(raw_data.schema.names)):
        raw_data = raw_data.drop(columns_to_remove)

    (train, test) = raw_data.randomSplit([0.8, 0.2])
    (test, validation) = raw_data.randomSplit([0.5, 0.5])

    target_stats = raw_data.describe(["target"])

    log_metric(
        "target.mean",
        target_stats.filter(target_stats["summary"] == "mean")
        .collect()[0]
        .asDict()["target"],
    )
    log_metric(
        "target.std",
        target_stats.filter(target_stats["summary"] == "stddev")
        .collect()[0]
        .asDict()["target"],
    )

    return train, test, validation
github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / train / train_pipeline.py View on Github external
def split_data(
    raw_data: pd.DataFrame,
) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    columns_to_remove = set(["id", "0_norm", "10_norm"])
    if columns_to_remove.issubset(raw_data.columns):
        raw_data.drop(columns_to_remove, axis=1, inplace=True)

    train_df, test_df = train_test_split(raw_data)
    test_df, validation_df = train_test_split(test_df, test_size=0.5)

    log_dataframe("raw", raw_data)
    log_metric("target.mean", raw_data["target"].mean())
    log_metric("target.std", raw_data["target"].std())

    return train_df, test_df, validation_df
github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / wine_quality / wine_quality_decorators_py2.py View on Github external
def validate_model(model, validation_dataset):
    # type: (ElasticNet, pd.DataFrame) -> str
    """ Calculates metrics of wine prediction model (py27) """
    validation_x = validation_dataset.drop(["quality"], 1)
    validation_y = validation_dataset[["quality"]]

    prediction = model.predict(validation_x)
    (rmse, mae, r2) = calculate_metrics(validation_y, prediction)

    log_artifact(
        "prediction_scatter_plot", _create_scatter_plot(validation_y, prediction)
    )

    log_metric("rmse", rmse)
    log_metric("mae", rmse)
    log_metric("r2", r2)

    return "%s,%s,%s" % (rmse, mae, r2)
github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / train / train_pipeline.py View on Github external
def validate_model_for_customer(
    model: ElasticNet, validation_dataset: pd.DataFrame, threshold=0.2
) -> Tuple[str, figure.Figure]:
    log_dataframe("validation", validation_dataset)
    # support for py3 parqeut
    validation_dataset = validation_dataset.rename(str, axis="columns")
    validation_x = validation_dataset.drop(["target"], 1)
    validation_y = validation_dataset[["target"]]

    prediction = model.predict(validation_x)
    (rmse, mae, r2) = calculate_metrics(validation_y, prediction)

    log_metric("rmse", rmse)
    log_metric("mae", mae)
    log_metric("r2", r2)
    fig = _create_scatter_plot(validation_y, prediction)
    if r2 < threshold:
        raise Exception(
            "Model quality is below threshold. Got R2 equal to %s, expect at least %s"
            % (r2, threshold)
        )

    return "%s,%s,%s" % (rmse, mae, r2), fig
github databand-ai / dbnd / examples / src / dbnd_examples / pipelines / wine_quality / wine_quality_decorators_py2.py View on Github external
def validate_model(model, validation_dataset):
    # type: (ElasticNet, pd.DataFrame) -> str
    """ Calculates metrics of wine prediction model (py27) """
    validation_x = validation_dataset.drop(["quality"], 1)
    validation_y = validation_dataset[["quality"]]

    prediction = model.predict(validation_x)
    (rmse, mae, r2) = calculate_metrics(validation_y, prediction)

    log_artifact(
        "prediction_scatter_plot", _create_scatter_plot(validation_y, prediction)
    )

    log_metric("rmse", rmse)
    log_metric("mae", rmse)
    log_metric("r2", r2)

    return "%s,%s,%s" % (rmse, mae, r2)