Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_k_folds():
sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
folds = sd.split(4)
assert len(folds) == 4
assert all(f.features.shape[0] == f.labels.shape[0]
== f.protected_attributes.shape[0] == len(f.instance_names)
== f.instance_weights.shape[0] == 1 for f in folds)
folds = sd.split(3)
assert folds[0].features.shape[0] == 2
def test_copy():
sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
sd2 = sd.copy()
sd3 = sd.copy(True)
sd.features[0] = 999
assert np.all(sd2.features[0] == 999)
assert not np.any(sd3.features[0] == 999)
def test_eq():
sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
sd2 = sd.copy()
sd3 = sd.copy(True)
sd4 = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])
assert sd == sd2
assert sd == sd3
assert sd2 == sd3
assert sd != sd4
def test_temporarily_ignore():
sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])
modified = sd.copy()
modified.labels = sd.labels + 1
assert sd != modified
with sd.temporarily_ignore('labels'):
assert sd == modified
assert 'labels' not in sd.ignore_fields
cols = ['one', 'two', 'three', 'label']
labs = np.ones((4, 1))
df = pd.DataFrame(data=np.concatenate((data, labs), axis=1), columns=cols)
sd = StructuredDataset(df=df, label_names=['label'],
protected_attribute_names=['one', 'three'])
distorted = data + 1
sd_distorted = sd.copy(True)
sd_distorted.features = distorted
rand = np.random.randint(0, 10, (4, 4))
rand2 = np.random.randint(0, 10, (4, 3))
df_rand = pd.DataFrame(data=rand, columns=cols)
sd_rand = StructuredDataset(df=df_rand, label_names=['label'],
protected_attribute_names=['one', 'three'])
sd_rand2 = sd_rand.copy(True)
sd_rand2.features = rand2
priv = [{'one': 1}]
unpriv = [{'one': 2}]
def test_euclidean_distance():
sdm = SampleDistortionMetric(sd, sd_distorted)
assert sdm.total_euclidean_distance() == 4*np.sqrt(3)
def test_manhattan_distance():
sdm = SampleDistortionMetric(sd, sd_distorted)
assert sdm.total_manhattan_distance() == 12
def test_eq():
sd = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['two'])
sd2 = sd.copy()
sd3 = sd.copy(True)
sd4 = StructuredDataset(df=df, label_names=['label'], protected_attribute_names=['one', 'three'])
assert sd == sd2
assert sd == sd3
assert sd2 == sd3
assert sd != sd4
import numpy as np
import pandas as pd
from scipy.spatial.distance import cdist
from aif360.datasets import StructuredDataset
from aif360.metrics import SampleDistortionMetric
data = np.arange(12).reshape((3, 4)).T
cols = ['one', 'two', 'three', 'label']
labs = np.ones((4, 1))
df = pd.DataFrame(data=np.concatenate((data, labs), axis=1), columns=cols)
sd = StructuredDataset(df=df, label_names=['label'],
protected_attribute_names=['one', 'three'])
distorted = data + 1
sd_distorted = sd.copy(True)
sd_distorted.features = distorted
rand = np.random.randint(0, 10, (4, 4))
rand2 = np.random.randint(0, 10, (4, 3))
df_rand = pd.DataFrame(data=rand, columns=cols)
sd_rand = StructuredDataset(df=df_rand, label_names=['label'],
protected_attribute_names=['one', 'three'])
sd_rand2 = sd_rand.copy(True)
sd_rand2.features = rand2
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
from aif360.datasets import StructuredDataset
class BinaryLabelDataset(StructuredDataset):
"""Base class for all structured datasets with binary labels."""
def __init__(self, favorable_label=1., unfavorable_label=0., **kwargs):
"""
Args:
favorable_label (float): Label value which is considered favorable
(i.e. "positive").
unfavorable_label (float): Label value which is considered
unfavorable (i.e. "negative").
**kwargs: StructuredDataset arguments.
"""
self.favorable_label = float(favorable_label)
self.unfavorable_label = float(unfavorable_label)
super(BinaryLabelDataset, self).__init__(**kwargs)
format as `privileged_groups`.
Raises:
TypeError: `dataset` must be a
:obj:`~aif360.datasets.StructuredDataset` type.
ValueError: `privileged_groups` and `unprivileged_groups` must be
disjoint.
Examples:
>>> from aif360.datasets import GermanDataset
>>> german = GermanDataset()
>>> u = [{'sex': 1, 'age': 1}, {'sex': 0}]
>>> p = [{'sex': 1, 'age': 0}]
>>> dm = DatasetMetric(german, unprivileged_groups=u, privileged_groups=p)
"""
if not isinstance(dataset, StructuredDataset):
raise TypeError("'dataset' should be a StructuredDataset")
# sets self.dataset
super(DatasetMetric, self).__init__(dataset)
# TODO: should this deepcopy?
self.privileged_groups = privileged_groups
self.unprivileged_groups = unprivileged_groups
# don't check if nothing was provided
if not self.privileged_groups or not self.unprivileged_groups:
return
priv_mask = utils.compute_boolean_conditioning_vector(
self.dataset.protected_attributes,
self.dataset.protected_attribute_names, self.privileged_groups)
of `dicts` where the keys are `protected_attribute_names` and
the values are values in `protected_attributes`. Each `dict`
element describes a single group. See examples for more details.
unprivileged_groups (list(dict)): Unprivileged groups in the same
format as `privileged_groups`.
Raises:
TypeError: `dataset` and `distorted_dataset` must be
:obj:`~aif360.datasets.StructuredDataset` types.
"""
# sets self.dataset, self.unprivileged_groups, self.privileged_groups
super(SampleDistortionMetric, self).__init__(dataset,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)
if isinstance(distorted_dataset, StructuredDataset):
self.distorted_dataset = distorted_dataset
else:
raise TypeError("'distorted_dataset' should be a StructuredDataset")
with dataset.temporarily_ignore('features', 'labels', 'scores'):
if dataset != distorted_dataset:
raise ValueError("The two datasets may differ in features and "
"labels/scores only.")