Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
[1, 0],
[1, 1],
[1, 0],
[1, 0],
[2, 1],
[2, 0],
[2, 1],
[2, 1]])
pred = data.copy()
pred[[3, 9], -1] = 0
pred[[4, 5], -1] = 1
df = pd.DataFrame(data, columns=['feat', 'label'])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld = BinaryLabelDataset(df=df, label_names=['label'],
protected_attribute_names=['feat'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
assert cm.theil_index() == 4*np.log(2)/10
[1, 0],
[1, 1],
[1, 0],
[1, 0],
[2, 1],
[2, 0],
[2, 1],
[2, 1]])
pred = data.copy()
pred[[3, 9], -1] = 0
pred[[4, 5], -1] = 1
df = pd.DataFrame(data, columns=['feat', 'label'])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld = BinaryLabelDataset(df=df, label_names=['label'],
protected_attribute_names=['feat'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
assert cm.generalized_entropy_index() == 0.2
pred = data.copy()
pred[:, -1] = np.array([0, 1, 1, 0, 0, 0, 0, 1, 1, 1])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
assert cm.generalized_entropy_index() == 0.3
[1, 0],
[1, 1],
[1, 0],
[1, 0],
[2, 1],
[2, 0],
[2, 1],
[2, 1]])
pred = data.copy()
pred[[3, 9], -1] = 0
pred[[4, 5], -1] = 1
df = pd.DataFrame(data, columns=['feat', 'label'])
df2 = pd.DataFrame(pred, columns=['feat', 'label'])
bld = BinaryLabelDataset(df=df, label_names=['label'],
protected_attribute_names=['feat'])
bld2 = BinaryLabelDataset(df=df2, label_names=['label'],
protected_attribute_names=['feat'])
cm = ClassificationMetric(bld, bld2)
b = np.array([1, 1, 1.25, 1.25, 1.25, 1.25, 0.75, 0.75, 0.75, 0.75])
assert cm.between_all_groups_generalized_entropy_index() == 1/20*np.sum(b**2 - 1)
self.label_names = [frame.names[-1]]
self.privileged_groups = config['privileged_groups']
self.unprivileged_groups = config['unprivileged_groups']
self.favorable_label = float(config['favorable_label'])
self.unfavorable_label = float(config['unfavorable_label'])
self.protected_attribute_names = config['protected_attribute_names']
self.lfr = LFR(
unprivileged_groups=self.unprivileged_groups,
privileged_groups=self.privileged_groups,
verbose=0,
)
self.lfr.fit(
BinaryLabelDataset(
df=frame.to_pandas(),
favorable_label=self.favorable_label,
unfavorable_label=self.unfavorable_label,
label_names=self.label_names,
protected_attribute_names=self.protected_attribute_names,
)
)
self.fitted = True
def transform(self, X: dt.Frame, y: np.array = None):
from aif360.datasets import BinaryLabelDataset
# Transformation should only occur during training when y is present
if self.fitted and (self.label_names in X.names or y is not None):
if self.label_names not in X.names:
X = dt.cbind(X, dt.Frame(y))
X_pd = X.to_pandas()
X = dt.Frame(X_pd.fillna(X_pd.mean()))
transformed_X: BinaryLabelDataset = self.lfr.transform(
BinaryLabelDataset(
df=X.to_pandas(),
favorable_label=self.favorable_label,
unfavorable_label=self.unfavorable_label,
label_names=self.label_names,
protected_attribute_names=self.protected_attribute_names,
)
)
return dt.Frame(
transformed_X.features,
names=[name+"_lfr" for name in transformed_X.feature_names],
)
# For predictions no transformation is required
else:
return X
df_transformed[p] = df_transformed[p].replace(pmap_rev)
# Map the labels to numeric values
for idx, p in enumerate(Y_feature_names):
pmap = dataset.metadata["label_maps"][idx]
pmap_rev = dict(zip(pmap.values(), pmap.keys()))
df_transformed[p] = df_transformed[p].replace(pmap_rev)
# Dummy code and convert to a dataset
df_dum = pd.concat([pd.get_dummies(df_transformed.loc[:, X_feature_names],
prefix_sep="="),
df_transformed.loc[:, Y_feature_names+D_feature_names]],
axis=1)
# Create a dataset out of df_dum
dataset_transformed = BinaryLabelDataset(
df=df_dum,
label_names=Y_feature_names,
protected_attribute_names=self.protected_attribute_names,
privileged_protected_attributes=self.privileged_protected_attributes,
unprivileged_protected_attributes=self.unprivileged_protected_attributes,
favorable_label=dataset.favorable_label,
unfavorable_label=dataset.unfavorable_label,
metadata=dataset.metadata)
return dataset_transformed
def dataset_wrapper(outcome, protected, unprivileged_groups, privileged_groups, favorable_label, unfavorable_label):
""" A wrapper function to create aif360 dataset from outcome and protected in numpy array format.
"""
df = pd.DataFrame(data=outcome,
columns=['outcome'])
df['race'] = protected
dataset = BinaryLabelDataset(favorable_label=favorable_label,
unfavorable_label=unfavorable_label,
df=df,
label_names=['outcome'],
protected_attribute_names=['race'],
unprivileged_protected_attributes=unprivileged_groups)
return dataset
unprivileged_groups (list(dict)): Unprivileged groups in the same
format as `privileged_groups`.
Raises:
TypeError: `dataset` and `classified_dataset` must be
:obj:`~aif360.datasets.BinaryLabelDataset` types.
"""
if not isinstance(dataset, BinaryLabelDataset):
raise TypeError("'dataset' should be a BinaryLabelDataset")
# sets self.dataset, self.unprivileged_groups, self.privileged_groups
super(ClassificationMetric, self).__init__(dataset,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
if isinstance(classified_dataset, BinaryLabelDataset):
self.classified_dataset = classified_dataset
else:
raise TypeError("'classified_dataset' should be a "
"BinaryLabelDataset.")
# Verify if everything except the predictions and metadata are the same
# for the two datasets
with self.dataset.temporarily_ignore('labels', 'scores'):
if self.dataset != self.classified_dataset:
raise ValueError("The two datasets are expected to differ only "
"in 'labels' or 'scores'.")