Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_epsilon_all_groups():
def custom_preprocessing(df):
# slight workaround for non-binary protected attribute
# feature should be categorical but protected attribute should be numerical
mapping = {'Black': 0, 'White': 1, 'Asian-Pac-Islander': 2,
'Amer-Indian-Eskimo': 3, 'Other': 4}
df['race-num'] = df.race.map(mapping)
return df.fillna('Unknown')
nonbinary_ad = AdultDataset(
protected_attribute_names=['sex', 'native-country', 'race-num'],
privileged_classes=[['Male'], ['United-States'], [1]],
categorical_features=['workclass', 'education', 'marital-status',
'occupation', 'relationship', 'race'],
custom_preprocessing=custom_preprocessing)
# drop redundant race feature (not relevant to this test)
index = nonbinary_ad.feature_names.index('race-num')
nonbinary_ad.features = np.delete(nonbinary_ad.features, index, axis=1)
nonbinary_ad.feature_names = np.delete(nonbinary_ad.feature_names, index)
nonbinary_test, _ = nonbinary_ad.split([16281], shuffle=False)
dataset_metric = BinaryLabelDatasetMetric(nonbinary_test)
eps_data = dataset_metric.smoothed_empirical_differential_fairness()
assert eps_data == 2.063813731996515 # verified with reference implementation
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from aif360.datasets import AdultDataset
from aif360.sklearn.datasets import fetch_adult
from aif360.algorithms.postprocessing import CalibratedEqOddsPostprocessing
from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta
X, y, sample_weight = fetch_adult(numeric_only=True)
adult = AdultDataset(instance_weights_name='fnlwgt', categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss',
'hours-per-week'], features_to_drop=[])
def test_calib_eq_odds_sex_weighted():
logreg = LogisticRegression(solver='lbfgs', max_iter=500)
y_pred = logreg.fit(X, y, sample_weight=sample_weight).predict_proba(X)
adult_pred = adult.copy()
adult_pred.scores = y_pred[:, 1]
orig_cal_eq_odds = CalibratedEqOddsPostprocessing(
unprivileged_groups=[{'sex': 0}], privileged_groups=[{'sex': 1}])
orig_cal_eq_odds.fit(adult, adult_pred)
cal_eq_odds = CalibratedEqualizedOdds('sex')
cal_eq_odds.fit(y_pred, y, sample_weight=sample_weight)
assert np.isclose(orig_cal_eq_odds.priv_mix_rate, cal_eq_odds.mix_rates_[1])
assert np.isclose(orig_cal_eq_odds.unpriv_mix_rate, cal_eq_odds.mix_rates_[0])
def test_adult():
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])
scaler = MinMaxScaler(copy=False)
# ad.features = scaler.fit_transform(ad.features)
train, test = ad.split([32561])
assert np.any(test.labels)
train.features = scaler.fit_transform(train.features)
test.features = scaler.transform(test.features)
index = train.feature_names.index(protected)
X_tr = np.delete(train.features, index, axis=1)
X_te = np.delete(test.features, index, axis=1)
y_tr = train.labels.ravel()
def test_adult_test_set():
ad = AdultDataset()
# train, test = ad.split([32561])
train, test = ad.split([30162])
assert np.any(test.labels)
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from aif360.datasets import AdultDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric
ad = AdultDataset(protected_attribute_names=['race', 'sex', 'native-country'],
privileged_classes=[['White'], ['Male'], ['United-States']],
categorical_features=['workclass', 'education',
'marital-status', 'occupation', 'relationship'],
custom_preprocessing=lambda df: df.fillna('Unknown'))
adult_test, adult_train = ad.split([16281], shuffle=False)
scaler = StandardScaler()
X = scaler.fit_transform(adult_train.features)
test_X = scaler.transform(adult_test.features)
clf = LogisticRegression(C=1.0, random_state=0, solver='liblinear')
adult_pred = adult_test.copy()
adult_pred.labels = clf.fit(X, adult_train.labels.ravel()).predict(test_X)
dataset_metric = BinaryLabelDatasetMetric(adult_test)
classifier_metric = BinaryLabelDatasetMetric(adult_pred)
def test_adult():
np.random.seed(1)
# np.random.seed(9876)
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain',
'capital-loss', 'hours-per-week'])
#scaler = MinMaxScaler(copy=False)
# ad.features = scaler.fit_transform(ad.features)
train, test = ad.split([32561])
biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
biased_model.fit(train)
dataset_bias_test = biased_model.predict(test)
biased_cm = ClassificationMetric(test, dataset_bias_test,
unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
def test_adult():
ad = AdultDataset()
# print(ad.feature_names)
assert np.isclose(ad.labels.mean(), 0.2478, atol=5e-5)
bldm = BinaryLabelDatasetMetric(ad)
assert bldm.num_instances() == 45222
def test_repair0():
ad = AdultDataset(protected_attribute_names=['sex'],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])
di = DisparateImpactRemover(repair_level=0.)
ad_repd = di.fit_transform(ad)
assert ad_repd == ad
def test_adult_no_drop():
ad = AdultDataset(protected_attribute_names=['sex'],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num'])
bldm = BinaryLabelDatasetMetric(ad)
assert bldm.num_instances() == 48842
XD_features = ['Age (decade)', 'Education Years', 'sex', 'race']
D_features = ['sex', 'race'] if protected_attributes is None else protected_attributes
Y_features = ['Income Binary']
X_features = list(set(XD_features)-set(D_features))
categorical_features = ['Age (decade)', 'Education Years']
# privileged classes
all_privileged_classes = {"sex": [1.0],
"race": [1.0]}
# protected attribute maps
all_protected_attribute_maps = {"sex": {1.0: 'Male', 0.0: 'Female'},
"race": {1.0: 'White', 0.0: 'Non-white'}}
return AdultDataset(
label_name=Y_features[0],
favorable_classes=['>50K', '>50K.'],
protected_attribute_names=D_features,
privileged_classes=[all_privileged_classes[x] for x in D_features],
instance_weights_name=None,
categorical_features=categorical_features,
features_to_keep=X_features+Y_features+D_features,
na_values=['?'],
metadata={'label_maps': [{1.0: '>50K', 0.0: '<=50K'}],
'protected_attribute_maps': [all_protected_attribute_maps[x]
for x in D_features]},
custom_preprocessing=custom_preprocessing)