Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
random_state=RANDOM_SEED)
uplift_model = BaseTClassifier(learner=LogisticRegression())
uplift_model.fit(X=df_train[x_names].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values,
treatment=df_test['treatment_group_key'].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
control_effect_learner=XGBRegressor(),
treatment_outcome_learner=XGBClassifier(),
treatment_effect_learner=XGBRegressor())
uplift_model.fit(X=df_train[x_names].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values,
p=df_test['propensity_score'].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
random_state=RANDOM_SEED)
uplift_model = BaseSClassifier(learner=XGBClassifier())
uplift_model.fit(X=df_train[x_names].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values,
treatment=df_test['treatment_group_key'].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
uplift_model = BaseRClassifier(outcome_learner=XGBClassifier(),
effect_learner=XGBRegressor())
uplift_model.fit(X=df_train[x_names].values,
p=df_train['propensity_score'].values,
treatment=df_train['treatment_group_key'].values,
y=df_train[CONVERSION].values)
y_pred = uplift_model.predict(X=df_test[x_names].values)
auuc_metrics = pd.DataFrame(
np.c_[y_pred, df_test['treatment_group_key'].values, df_test[CONVERSION].values],
columns=['y_pred', 'W', CONVERSION])
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='W')
# Check if the cumulative gain when using the model's prediction is
# higher than it would be under random targeting
assert cumgain['y_pred'].sum() > cumgain['Random'].sum()
df_test['treatment_group_key'] == best_treatment, 1, 0
)
actual_is_control = np.where(
df_test['treatment_group_key'] == CONTROL_NAME, 1, 0
)
synthetic = (actual_is_best == 1) | (actual_is_control == 1)
synth = result[synthetic]
auuc_metrics = synth.assign(
is_treated=1 - actual_is_control[synthetic],
conversion=df_test.loc[synthetic, CONVERSION].values,
uplift_tree=synth.max(axis=1)
).drop(columns=result.columns)
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='is_treated')
# Check if the cumulative gain of UpLift Random Forest is higher than
# random
assert cumgain['uplift_tree'].sum() > cumgain['Random'].sum()
df_test['treatment_group_key'] == best_treatment, 1, 0
)
actual_is_control = np.where(
df_test['treatment_group_key'] == CONTROL_NAME, 1, 0
)
synthetic = (actual_is_best == 1) | (actual_is_control == 1)
synth = result[synthetic]
auuc_metrics = synth.assign(
is_treated=1 - actual_is_control[synthetic],
conversion=df_test.loc[synthetic, CONVERSION].values,
uplift_tree=synth.max(axis=1)
).drop(columns=list(uplift_model.classes_))
cumgain = get_cumgain(auuc_metrics,
outcome_col=CONVERSION,
treatment_col='is_treated')
# Check if the cumulative gain of UpLift Random Forest is higher than
# random
assert cumgain['uplift_tree'].sum() > cumgain['Random'].sum()
"""
synthetic_preds_df = synthetic_preds.copy()
generated_data = synthetic_preds_df.pop(KEY_GENERATED_DATA)
synthetic_preds_df = pd.DataFrame(synthetic_preds_df)
synthetic_preds_df = synthetic_preds_df.drop(drop_learners, axis=1)
synthetic_preds_df['y'] = generated_data[outcome_col]
synthetic_preds_df['w'] = generated_data[treatment_col]
if treatment_effect_col in generated_data.keys():
synthetic_preds_df['tau'] = generated_data[treatment_effect_col]
assert ((outcome_col in synthetic_preds_df.columns) and
(treatment_col in synthetic_preds_df.columns) or
treatment_effect_col in synthetic_preds_df.columns)
cumlift = get_cumgain(synthetic_preds_df, outcome_col='y', treatment_col='w',
treatment_effect_col='tau')
auuc_df = pd.DataFrame(cumlift.columns)
auuc_df.columns = ['Learner']
auuc_df['cum_gain_auuc'] = [auc(cumlift.index.values/100, cumlift[learner].values) for learner in cumlift.columns]
auuc_df = auuc_df.sort_values('cum_gain_auuc', ascending=False)
if plot:
plot_gain(synthetic_preds_df, outcome_col=outcome_col,
treatment_col=treatment_col, treatment_effect_col=treatment_effect_col)
return auuc_df