Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_threshold_optimization_different_input_lengths(X_transform, y_transform,
sensitive_features_transform,
constraints):
n = len(sensitive_features_ex1)
for permutation in [(0, 1), (1, 0)]:
with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
.format("X, sensitive_features, and y")):
X = X_transform(_format_as_list_of_lists(
sensitive_features_ex1)[:n - permutation[0]])
y = y_transform(labels_ex[:n - permutation[1]])
sensitive_features = sensitive_features_transform(sensitive_features_ex1)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
# try providing empty lists in all combinations
for permutation in [(0, n), (n, 0)]:
X = X_transform(_format_as_list_of_lists(
sensitive_features_ex1)[:n - permutation[0]])
y = y_transform(labels_ex[:n - permutation[1]])
sensitive_features = sensitive_features_transform(sensitive_features_ex1)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
with pytest.raises(ValueError, match=EMPTY_INPUT_ERROR_MESSAGE):
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
def test_threshold_optimization_demographic_parity_e2e(sensitive_features,
sensitive_feature_names,
expected_p0, expected_p1,
X_transform, y_transform,
sensitive_features_transform):
X = X_transform(_format_as_list_of_lists(sensitive_features))
y = y_transform(labels_ex)
sensitive_features_ = sensitive_features_transform(sensitive_features)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=DEMOGRAPHIC_PARITY)
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
predictions = adjusted_predictor._pmf_predict(X, sensitive_features=sensitive_features_)
# assert demographic parity
for sensitive_feature_name in sensitive_feature_names:
average_probs = np.average(
predictions[np.array(sensitive_features) == sensitive_feature_name], axis=0)
assert np.isclose(average_probs[0], expected_p0)
assert np.isclose(average_probs[1], expected_p1)
def test_predict_different_argument_lengths(sensitive_features, sensitive_feature_names,
X_transform, y_transform,
sensitive_features_transform, constraints):
X = X_transform(_format_as_list_of_lists(sensitive_features))
y = y_transform(labels_ex)
sensitive_features_ = sensitive_features_transform(sensitive_features)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
.format("X and sensitive_features")):
adjusted_predictor.predict(
X, sensitive_features=sensitive_features_transform(sensitive_features[:-1]))
with pytest.raises(ValueError, match=DIFFERENT_INPUT_LENGTH_ERROR_MESSAGE
.format("X and sensitive_features")):
adjusted_predictor.predict(X_transform(_format_as_list_of_lists(sensitive_features))[:-1],
sensitive_features=sensitive_features_)
def test_threshold_optimization_non_binary_labels(X_transform, y_transform,
sensitive_features_transform, constraints):
non_binary_labels = copy.deepcopy(labels_ex)
non_binary_labels[0] = 2
X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
y = y_transform(non_binary_labels)
sensitive_features = sensitive_features_transform(sensitive_features_ex1)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
with pytest.raises(ValueError, match=NON_BINARY_LABELS_ERROR_MESSAGE):
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
def test_predict_multiple_sensitive_features_columns_error(
sensitive_features, sensitive_feature_names, X_transform, y_transform, constraints):
X = X_transform(_format_as_list_of_lists(sensitive_features))
y = y_transform(labels_ex)
sensitive_features_ = pd.DataFrame({"A1": sensitive_features, "A2": sensitive_features})
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
with pytest.raises(ValueError,
match=MULTIPLE_DATA_COLUMNS_ERROR_MESSAGE.format("sensitive_features")):
adjusted_predictor.predict(X, sensitive_features=sensitive_features_)
def test_threshold_optimization_degenerate_labels(X_transform, y_transform,
sensitive_features_transform, constraints):
X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
y = y_transform(degenerate_labels_ex)
sensitive_features = sensitive_features_transform(sensitive_features_ex1)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
with pytest.raises(ValueError, match=DEGENERATE_LABELS_ERROR_MESSAGE.format('A')):
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features)
def test_threshold_optimization_equalized_odds_e2e(
sensitive_features, sensitive_feature_names, expected_positive_p0, expected_positive_p1,
expected_negative_p0, expected_negative_p1, X_transform, y_transform,
sensitive_features_transform):
X = X_transform(_format_as_list_of_lists(sensitive_features))
y = y_transform(labels_ex)
sensitive_features_ = sensitive_features_transform(sensitive_features)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=EQUALIZED_ODDS)
adjusted_predictor.fit(X, y, sensitive_features=sensitive_features_)
predictions = adjusted_predictor._pmf_predict(X, sensitive_features=sensitive_features_)
# assert equalized odds
for a in sensitive_feature_names:
positive_indices = (np.array(sensitive_features) == a) * \
(np.array(labels_ex) == 1)
negative_indices = (np.array(sensitive_features) == a) * \
(np.array(labels_ex) == 0)
average_probs_positive_indices = np.average(
predictions[positive_indices], axis=0)
average_probs_negative_indices = np.average(
predictions[negative_indices], axis=0)
assert np.isclose(
def test_not_predictor(constraints):
with pytest.raises(ValueError, match=MISSING_PREDICT_ERROR_MESSAGE):
ThresholdOptimizer(unconstrained_predictor=ExampleNotPredictor(),
constraints=constraints)
def test_predict_before_fit_error(X_transform, sensitive_features_transform, predict_method_name,
constraints):
X = X_transform(_format_as_list_of_lists(sensitive_features_ex1))
sensitive_features = sensitive_features_transform(sensitive_features_ex1)
adjusted_predictor = ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
constraints=constraints)
with pytest.raises(ValueError, match=PREDICT_BEFORE_FIT_ERROR_MESSAGE):
getattr(adjusted_predictor, predict_method_name)(X, sensitive_features=sensitive_features)
def test_both_predictor_and_estimator_error(constraints):
with pytest.raises(ValueError, match=EITHER_PREDICTOR_OR_ESTIMATOR_ERROR_MESSAGE):
ThresholdOptimizer(unconstrained_predictor=ExamplePredictor(),
estimator=ExampleEstimator(),
constraints=constraints)