Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
random_state=RANDOM_SEED)
uplift_model = BaseTClassifier(learner=LogisticRegression())
uplift_model.fit(X=df_train[x_names].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values,
treatment=df_test['treatment_group_key'].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
control_effect_learner=XGBRegressor(),
treatment_outcome_learner=XGBClassifier(),
treatment_effect_learner=XGBRegressor())
uplift_model.fit(X=df_train[x_names].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values,
p=df_test['propensity_score'].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
random_state=RANDOM_SEED)
uplift_model = BaseSClassifier(learner=XGBClassifier())
uplift_model.fit(X=df_train[x_names].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values,
treatment=df_test['treatment_group_key'].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
uplift_model = BaseRClassifier(outcome_learner=XGBClassifier(),
effect_learner=XGBRegressor())
uplift_model.fit(X=df_train[x_names].values,
p=df_train['propensity_score'].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
def test_BaseSLearner(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()
learner = BaseSLearner(learner=LinearRegression())
# check the accuracy of the ATE estimation
ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True)
assert (ate_p >= lb) and (ate_p <= ub)
assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
def test_XGBTRegressor(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()
learner = XGBTRegressor()
# check the accuracy of the ATE estimation
ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
assert (ate_p >= lb) and (ate_p <= ub)
assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
# check the accuracy of the CATE estimation with the bootstrap CI
cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
assert gini(tau, cate_p.flatten()) > .5
def test_BaseTLearner(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()
learner = BaseTLearner(learner=XGBRegressor())
# check the accuracy of the ATE estimation
ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
assert (ate_p >= lb) and (ate_p <= ub)
assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
# check the accuracy of the CATE estimation with the bootstrap CI
cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
assert gini(tau, cate_p.flatten()) > .5
def test_LRSRegressor(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()
learner = LRSRegressor()
# check the accuracy of the ATE estimation
ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
assert (ate_p >= lb) and (ate_p <= ub)
assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
def test_BaseXLearner(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()
learner = BaseXLearner(learner=XGBRegressor())
# check the accuracy of the ATE estimation
ate_p, lb, ub = learner.estimate_ate(X=X, p=e, treatment=treatment, y=y)
assert (ate_p >= lb) and (ate_p <= ub)
assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
# check the accuracy of the CATE estimation with the bootstrap CI
cate_p, _, _ = learner.fit_predict(X=X, p=e, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
assert gini(tau, cate_p.flatten()) > .5
def test_BaseRLearner(generate_regression_data):
y, X, treatment, tau, b, e = generate_regression_data()
learner = BaseRLearner(learner=XGBRegressor())
# check the accuracy of the ATE estimation
ate_p, lb, ub = learner.estimate_ate(X=X, p=e, treatment=treatment, y=y)
assert (ate_p >= lb) and (ate_p <= ub)
assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
# check the accuracy of the CATE estimation with the bootstrap CI
cate_p, _, _ = learner.fit_predict(X=X, p=e, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
assert gini(tau, cate_p.flatten()) > .5