Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
sample_weight (array-like, optional): Sample weights passed through to
func.
**kwargs: Additional keyword args to be passed through to func.
Returns:
scalar: Difference in metric value for unprivileged and privileged
groups.
Examples:
>>> X, y = fetch_german(numeric_only=True)
>>> y_pred = LogisticRegression().fit(X, y).predict(X)
>>> difference(precision_score, y, y_pred, prot_attr='sex',
... priv_group='male')
-0.06955430006277463
"""
groups, _ = check_groups(y, prot_attr)
idx = (groups == priv_group)
unpriv = map(lambda a: a[~idx], (y,) + args)
priv = map(lambda a: a[idx], (y,) + args)
if sample_weight is not None:
return (func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
- func(*priv, sample_weight=sample_weight[idx], **kwargs))
return func(*unpriv, **kwargs) - func(*priv, **kwargs)
Args:
X (pandas.DataFrame): Training samples.
y (array-like): Training labels.
Returns:
self
"""
X, y, _ = check_inputs(X, y)
rng = check_random_state(self.random_state)
ii32 = np.iinfo(np.int32)
s1, s2, s3, s4 = rng.randint(ii32.min, ii32.max, size=4)
tf.reset_default_graph()
self.sess_ = tf.Session()
groups, self.prot_attr_ = check_groups(X, self.prot_attr)
le = LabelEncoder()
y = le.fit_transform(y)
self.classes_ = le.classes_
# BUG: LabelEncoder converts to ndarray which removes tuple formatting
groups = groups.map(str)
groups = le.fit_transform(groups)
self.groups_ = le.classes_
n_classes = len(self.classes_)
n_groups = len(self.groups_)
# use sigmoid for binary case
if n_classes == 2:
n_classes = 1
if n_groups == 2:
n_groups = 1
classes.
Args:
y_pred (pandas.DataFrame): Probability estimates of the targets as
returned by a ``predict_proba()`` call or equivalent. Note: must
include protected attributes in the index.
Returns:
numpy.ndarray: Returns the probability of the sample for each class
in the model, where classes are ordered as they are in
``self.classes_``.
"""
check_is_fitted(self, 'mix_rates_')
rng = check_random_state(self.random_state)
groups, _ = check_groups(y_pred, self.prot_attr_)
if not set(np.unique(groups)) <= set(self.groups_):
raise ValueError('The protected groups from y_pred:\n{}\ndo not '
'match those from the training set:\n{}'.format(
np.unique(groups), self.groups_))
pos_idx = np.nonzero(self.classes_ == self.pos_label_)[0][0]
y_pred = y_pred.iloc[:, pos_idx]
yt = np.empty_like(y_pred)
for grp_idx in range(2):
i = (groups == self.groups_[grp_idx])
to_replace = (rng.rand(sum(i)) < self.mix_rates_[grp_idx])
new_preds = y_pred[i].copy()
new_preds[to_replace] = self.base_rates_[grp_idx]
yt[i] = new_preds
all groups defined by the prot_attr.
alpha (scalar, optional): Parameter that regulates the weight given to
distances between values at different parts of the distribution. A
value of 0 is equivalent to the mean log deviation, 1 is the Theil
index, and 2 is half the squared coefficient of variation.
pos_label (scalar, optional): The label of the positive class.
References:
.. [#speicher18] `T. Speicher, H. Heidari, N. Grgic-Hlaca,
K. P. Gummadi, A. Singla, A. Weller, and M. B. Zafar, "A Unified
Approach to Quantifying Algorithmic Unfairness: Measuring Individual
and Group Unfairness via Inequality Indices," ACM SIGKDD
International Conference on Knowledge Discovery and Data Mining,
2018. `_
"""
groups, _ = check_groups(y_true, prot_attr)
b = np.empty_like(y_true, dtype='float')
if priv_group is not None:
groups = [1 if g == priv_group else 0 for g in groups]
for g in np.unique(groups):
b[groups == g] = (1 + (y_pred[groups == g] == pos_label)
- (y_true[groups == g] == pos_label)).mean()
return generalized_entropy_index(b, alpha=alpha)
Args:
func (function): A metric function from :mod:`sklearn.metrics` or
:mod:`aif360.sklearn.metrics.metrics`.
y (pandas.Series): Outcome vector with protected attributes as index.
*args: Additional positional args to be passed through to func.
prot_attr (array-like, keyword-only): Protected attribute(s). If
``None``, all protected attributes in y are used.
priv_group (scalar, optional): The label of the privileged group.
sample_weight (array-like, optional): Sample weights passed through to
func.
**kwargs: Additional keyword args to be passed through to func.
Returns:
scalar: Ratio of metric values for unprivileged and privileged groups.
"""
groups, _ = check_groups(y, prot_attr)
idx = (groups == priv_group)
unpriv = map(lambda a: a[~idx], (y,) + args)
priv = map(lambda a: a[idx], (y,) + args)
if sample_weight is not None:
numerator = func(*unpriv, sample_weight=sample_weight[~idx], **kwargs)
denominator = func(*priv, sample_weight=sample_weight[idx], **kwargs)
else:
numerator = func(*unpriv, **kwargs)
denominator = func(*priv, **kwargs)
if denominator == 0:
warnings.warn("The ratio is ill-defined and being set to 0.0 because "
"'{}' for privileged samples is 0.".format(func.__name__),
UndefinedMetricWarning)
return 0.
Args:
X (pandas.DataFrame): Training samples.
y (array-like): Training labels.
sample_weight (array-like, optional): Sample weights.
Returns:
tuple:
Samples and their weights.
* **X** -- Unchanged samples.
* **sample_weight** -- Transformed sample weights.
"""
X, y, sample_weight = check_inputs(X, y, sample_weight)
sample_weight_t = np.empty_like(sample_weight)
groups, self.prot_attr_ = check_groups(X, self.prot_attr)
# TODO: maintain categorical ordering
self.groups_ = np.unique(groups)
self.classes_ = np.unique(y)
n_groups = len(self.groups_)
n_classes = len(self.classes_)
self.reweigh_factors_ = np.full((n_groups, n_classes), np.nan)
def N_(i): return sample_weight[i].sum()
N = sample_weight.sum()
for i, g in enumerate(self.groups_):
for j, c in enumerate(self.classes_):
g_and_c = (groups == g) & (y == c)
if np.any(g_and_c):
W_gc = N_(groups == g) * N_(y == c) / (N * N_(g_and_c))
sample_weight_t[g_and_c] = W_gc * sample_weight[g_and_c]
self.reweigh_factors_[i, j] = W_gc