Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
df['race-num'] = df.race.map(mapping)
return df.fillna('Unknown')
nonbinary_ad = AdultDataset(
protected_attribute_names=['sex', 'native-country', 'race-num'],
privileged_classes=[['Male'], ['United-States'], [1]],
categorical_features=['workclass', 'education', 'marital-status',
'occupation', 'relationship', 'race'],
custom_preprocessing=custom_preprocessing)
# drop redundant race feature (not relevant to this test)
index = nonbinary_ad.feature_names.index('race-num')
nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)
nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
eps_data = dataset_metric.smoothed_empirical_differential_fairness()
assert eps_data == 2.063813731996515 # verified with reference implementation
def test_adult_no_drop():
ad = AdultDataset(protected_attribute_names=['sex'],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])
bldm = BinaryLabelDatasetMetric(ad)
assert bldm.num_instances() == 48842
def test_adult():
ad = AdultDataset()
# print(ad.feature_names)
assert np.isclose(ad.labels.mean(), 0.2478, atol=5e-5)
bldm = BinaryLabelDatasetMetric(ad)
assert bldm.num_instances() == 45222
def test_german():
gd = GermanDataset()
bldm = BinaryLabelDatasetMetric(gd)
assert bldm.num_instances() == 1000
def disparate_impact(self):
outcome = super(MetricJSONExplainer, self).disparate_impact()
response = []
if isinstance(self.metric, BinaryLabelDatasetMetric):
response = OrderedDict((
("metric", "Disparate Impact"),
("message", outcome),
("numPositivePredictionsUnprivileged", self.metric.num_positives(privileged=False)),
("numUnprivileged", self.metric.num_instances(privileged=False)),
("numPositivePredictionsPrivileged", self.metric.num_positives(privileged=True)),
("numPrivileged", self.metric.num_instances(privileged=True)),
("description", "Computed as the ratio of rate of favorable outcome for the unprivileged group to that of the privileged group."),
("ideal", "The ideal value of this metric is 1.0 A value < 1 implies higher benefit for the privileged group and a value >1 implies a higher benefit for the unprivileged group.")
))
else:
response = OrderedDict((
("metric", "Disparate Impact"),
("message", outcome),
("numPositivePredictionsUnprivileged", self.metric.num_pred_positives(privileged=False)),
("numUnprivileged", self.metric.num_instances(privileged=False)),
def __call__(self, estimator, X, y):
predicted = estimator.predict(X)
y_pred = _ensure_series(predicted, X.index, y.dtype, y.name)
dataset_pred = self.pandas_to_dataset(X, y_pred)
fairness_metrics = aif360.metrics.BinaryLabelDatasetMetric(
dataset_pred,
self.fairness_info['unprivileged_groups'],
self.fairness_info['privileged_groups'])
method = getattr(fairness_metrics, self.metric)
result = method()
return result
high_ROC_margin = class_thresh
else:
low_ROC_margin = 0.0
high_ROC_margin = (1.0-class_thresh)
# Iterate through ROC margins
for ROC_margin in np.linspace(
low_ROC_margin,
high_ROC_margin,
self.num_ROC_margin):
self.ROC_margin = ROC_margin
# Predict using the current threshold and margin
dataset_transf_pred = self.predict(dataset_pred)
dataset_transf_metric_pred = BinaryLabelDatasetMetric(
dataset_transf_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
classified_transf_metric = ClassificationMetric(
dataset_true,
dataset_transf_pred,
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups)
ROC_margin_arr[cnt] = self.ROC_margin
class_thresh_arr[cnt] = self.classification_threshold
# Balanced accuracy and fairness metric computations
balanced_acc_arr[cnt] = 0.5*(classified_transf_metric.true_positive_rate()\
+classified_transf_metric.true_negative_rate())
if self.metric_name == "Statistical parity difference":
from itertools import product
import numpy as np
from aif360.metrics import BinaryLabelDatasetMetric, utils
from aif360.datasets import BinaryLabelDataset
class ClassificationMetric(BinaryLabelDatasetMetric):
"""Class for computing metrics based on two BinaryLabelDatasets.
The first dataset is the original one and the second is the output of the
classification transformer (or similar).
"""
def __init__(self, dataset, classified_dataset,
unprivileged_groups=None, privileged_groups=None):
"""
Args:
dataset (BinaryLabelDataset): Dataset containing ground-truth
labels.
classified_dataset (BinaryLabelDataset): Dataset containing
predictions.
privileged_groups (list(dict)): Privileged groups. Format is a list
of `dicts` where the keys are `protected_attribute_names` and
def statistical_parity_difference(self):
outcome = super(MetricJSONExplainer, self).statistical_parity_difference()
response = []
if isinstance(self.metric, BinaryLabelDatasetMetric):
response = OrderedDict((
("metric", "Statistical Parity Difference"),
("message", outcome),
("numPositivesUnprivileged", self.metric.num_positives(privileged=False)),
("numInstancesUnprivileged", self.metric.num_instances(privileged=False)),
("numPositivesPrivileged", self.metric.num_positives(privileged=True)),
("numInstancesPrivileged", self.metric.num_instances(privileged=True)),
("description", "Computed as the difference of the rate of favorable outcomes received by the unprivileged group to the privileged group."),
("ideal", " The ideal value of this metric is 0")
))
else:
response = OrderedDict((
("metric", "Statistical Parity Difference"),
("message", outcome),
("numPositivesUnprivileged", self.metric.num_pred_positives(privileged=False)),
("numInstancesUnprivileged", self.metric.num_instances(privileged=False)),