Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
debiased_model.fit(train)
#dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(test)
predictions = list(dataset_debiasing_test.labels)
predictions = [1 if y == train.favorable_label else
-1 for y in dataset_debiasing_test.labels.ravel()]
y_test = np.array([1 if y == train.favorable_label else
-1 for y in test.labels.ravel()])
x_control_test = pd.DataFrame(data=test.features,
columns=test.feature_names)[protected]
acc, sr, fdr = getStats(y_test, predictions, x_control_test)
debiased_cm = ClassificationMetric(test, dataset_debiasing_test,
unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
fdr2 = debiased_cm.false_discovery_rate_ratio()
fdr2 = min(fdr2, 1/fdr2)
assert np.isclose(fdr, fdr2)
#print(fdr, unconstrainedFDR)
assert(fdr2 >= unconstrainedFDR2)
def test_bias_amplification_binary_groups():
metric = ClassificationMetric(adult_test, adult_pred)
bias_amp = metric.differential_fairness_bias_amplification()
eps_data = dataset_metric.smoothed_empirical_differential_fairness()
eps_clf = classifier_metric.smoothed_empirical_differential_fairness()
assert bias_amp == (eps_clf - eps_data)
[1, 0],
[1, 0],
[2, 1],
[2, 0],
[2, 1],
[2, 1]])
pred = data.copy()
pred[[3, 9], -1] = 0
pred[[4, 5], -1] = 1
df = pd.DataFrame(data, columns=['feat', 'label'])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld = BinaryLabelDataset(df=df, label_names=['label'],
protected_attribute_names=['feat'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
b = np.array([1, 1, 1.25, 1.25, 1.25, 1.25, 0.75, 0.75, 0.75, 0.75])
assert cm.between_all_groups_generalized_entropy_index() == 1/20*np.sum(b**2 - 1)
[1, 0],
[1, 0],
[2, 1],
[2, 0],
[2, 1],
[2, 1]])
pred = data.copy()
pred[[3, 9], -1] = 0
pred[[4, 5], -1] = 1
df = pd.DataFrame(data, columns=['feat', 'label'])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld = BinaryLabelDataset(df=df, label_names=['label'],
protected_attribute_names=['feat'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
assert cm.generalized_entropy_index() == 0.2
pred = data.copy()
pred[:, -1] = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 1])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
assert cm.generalized_entropy_index() == 0.3
def fit(self, dataset_true, dataset_pred):
"""Compute parameters for equalizing odds using true and predicted
labels.
Args:
true_dataset (BinaryLabelDataset): Dataset containing true labels.
pred_dataset (BinaryLabelDataset): Dataset containing predicted
labels.
Returns:
EqOddsPostprocessing: Returns self.
"""
metric = ClassificationMetric(dataset_true, dataset_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
# compute basic statistics
sbr = metric.num_instances(privileged=True) / metric.num_instances()
obr = metric.num_instances(privileged=False) / metric.num_instances()
fpr0 = metric.false_positive_rate(privileged=True)
fpr1 = metric.false_positive_rate(privileged=False)
fnr0 = metric.false_negative_rate(privileged=True)
fnr1 = metric.false_negative_rate(privileged=False)
tpr0 = metric.true_positive_rate(privileged=True)
tpr1 = metric.true_positive_rate(privileged=False)
tnr0 = metric.true_negative_rate(privileged=True)
tnr1 = metric.true_negative_rate(privileged=False)
cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
dataset_pred.protected_attributes,
dataset_pred.protected_attribute_names,
self.unprivileged_groups)
cm = ClassificationMetric(dataset_true, dataset_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
self.base_rate_priv = cm.base_rate(privileged=True)
self.base_rate_unpriv = cm.base_rate(privileged=False)
# Create a dataset with "trivial" predictions
dataset_trivial = dataset_pred.copy(deepcopy=True)
dataset_trivial.scores[cond_vec_priv] = cm.base_rate(privileged=True)
dataset_trivial.scores[cond_vec_unpriv] = cm.base_rate(privileged=False)
cm_triv = ClassificationMetric(dataset_true, dataset_trivial,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
if self.fn_rate == 0:
priv_cost = cm.generalized_false_positive_rate(privileged=True)
unpriv_cost = cm.generalized_false_positive_rate(privileged=False)
priv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=True)
unpriv_trivial_cost = cm_triv.generalized_false_positive_rate(privileged=False)
elif self.fp_rate == 0:
priv_cost = cm.generalized_false_negative_rate(privileged=True)
unpriv_cost = cm.generalized_false_negative_rate(privileged=False)
priv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=True)
unpriv_trivial_cost = cm_triv.generalized_false_negative_rate(privileged=False)
else:
Returns:
CalibratedEqOddsPostprocessing: Returns self.
"""
# Create boolean conditioning vectors for protected groups
cond_vec_priv = utils.compute_boolean_conditioning_vector(
dataset_pred.protected_attributes,
dataset_pred.protected_attribute_names,
self.privileged_groups)
cond_vec_unpriv = utils.compute_boolean_conditioning_vector(
dataset_pred.protected_attributes,
dataset_pred.protected_attribute_names,
self.unprivileged_groups)
cm = ClassificationMetric(dataset_true, dataset_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
self.base_rate_priv = cm.base_rate(privileged=True)
self.base_rate_unpriv = cm.base_rate(privileged=False)
# Create a dataset with "trivial" predictions
dataset_trivial = dataset_pred.copy(deepcopy=True)
dataset_trivial.scores[cond_vec_priv] = cm.base_rate(privileged=True)
dataset_trivial.scores[cond_vec_unpriv] = cm.base_rate(privileged=False)
cm_triv = ClassificationMetric(dataset_true, dataset_trivial,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
if self.fn_rate == 0:
priv_cost = cm.generalized_false_positive_rate(privileged=True)
unpriv_cost = cm.generalized_false_positive_rate(privileged=False)
# Iterate through ROC margins
for ROC_margin in np.linspace(
low_ROC_margin,
high_ROC_margin,
self.num_ROC_margin):
self.ROC_margin = ROC_margin
# Predict using the current threshold and margin
dataset_transf_pred = self.predict(dataset_pred)
dataset_transf_metric_pred = BinaryLabelDatasetMetric(
dataset_transf_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
classified_transf_metric = ClassificationMetric(
dataset_true,
dataset_transf_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
ROC_margin_arr[cnt] = self.ROC_margin
class_thresh_arr[cnt] = self.classification_threshold
# Balanced accuracy and fairness metric computations
balanced_acc_arr[cnt] = 0.5*(classified_transf_metric.true_positive_rate()\
+classified_transf_metric.true_negative_rate())
if self.metric_name == "Statistical parity difference":
fair_metric_arr[cnt] = dataset_transf_metric_pred.mean_difference()
elif self.metric_name == "Average odds difference":
fair_metric_arr[cnt] = classified_transf_metric.average_odds_difference()
elif self.metric_name == "Equal opportunity difference":
_, y_pred = evaluate(model, x_test, y_test)
"""Calculate the fairness metrics"""
original_test_dataset = dataset_wrapper(outcome=y_test, protected=p_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups,
favorable_label=favorable_label,
unfavorable_label=unfavorable_label)
plain_predictions_test_dataset = dataset_wrapper(outcome=y_pred, protected=p_test,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups,
favorable_label=favorable_label,
unfavorable_label=unfavorable_label)
classified_metric_nodebiasing_test = ClassificationMetric(original_test_dataset,
plain_predictions_test_dataset,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
TPR = classified_metric_nodebiasing_test.true_positive_rate()
TNR = classified_metric_nodebiasing_test.true_negative_rate()
bal_acc_nodebiasing_test = 0.5*(TPR+TNR)
print("#### Plain model - without debiasing - classification metrics on test set")
metrics = {
"Classification accuracy": classified_metric_nodebiasing_test.accuracy(),
"Balanced classification accuracy": bal_acc_nodebiasing_test,
"Statistical parity difference": classified_metric_nodebiasing_test.statistical_parity_difference(),
"Disparate impact": classified_metric_nodebiasing_test.disparate_impact(),
"Equal opportunity difference": classified_metric_nodebiasing_test.equal_opportunity_difference(),
"Average odds difference": classified_metric_nodebiasing_test.average_odds_difference(),