Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_argument_types(self, transformX, transformY, transformA):
# This is an expanded-out version of one of the smoke tests
expgrad = ExponentiatedGradient(self.learner, constraints=DemographicParity(),
eps=0.1)
expgrad.fit(transformX(self.X), transformY(self.y), sensitive_features=transformA(self.A))
res = expgrad._expgrad_result._as_dict()
Q = res["best_classifier"]
res["n_classifiers"] = len(res["classifiers"])
disp = DemographicParity()
disp.load_data(self.X, self.y, sensitive_features=self.A)
error = ErrorRate()
error.load_data(self.X, self.y, sensitive_features=self.A)
res["disp"] = disp.gamma(Q).max()
res["error"] = error.gamma(Q)[0]
assert res["best_gap"] == pytest.approx(0.0000, abs=self._PRECISION)
assert res["last_t"] == 5
estimator = LogisticRegression(solver='liblinear',
fit_intercept=True,
random_state=97)
# Train an unmitigated estimator
unmitigated_estimator = copy.deepcopy(estimator)
unmitigated_estimator.fit(X, y)
# Do the grid search with a zero Lagrange multiplier
iterables = [['+', '-'], ['all'], [a0_label, a1_label]]
midx = pd.MultiIndex.from_product(iterables, names=['sign', 'event', 'group_id'])
lagrange_zero_series = pd.Series(np.zeros(4), index=midx)
grid_df = pd.DataFrame(lagrange_zero_series)
target = GridSearch(estimator,
constraints=DemographicParity(),
grid=grid_df)
target.fit(X, y, sensitive_features=A)
assert len(target.all_results) == 1
# Check coefficients
gs_coeff = target.best_result.predictor.coef_
um_coeff = unmitigated_estimator.coef_
assert np.array_equal(gs_coeff, um_coeff)
def test_argument_types(self, transformX, transformY, transformA):
# This is an expanded-out version of one of the smoke tests
expgrad = ExponentiatedGradient(self.learner, constraints=DemographicParity(),
eps=0.1)
expgrad.fit(transformX(self.X), transformY(self.y), sensitive_features=transformA(self.A))
res = expgrad._expgrad_result._as_dict()
Q = res["best_classifier"]
res["n_classifiers"] = len(res["classifiers"])
disp = DemographicParity()
disp.load_data(self.X, self.y, sensitive_features=self.A)
error = ErrorRate()
error.load_data(self.X, self.y, sensitive_features=self.A)
res["disp"] = disp.gamma(Q).max()
res["error"] = error.gamma(Q)[0]
assert res["best_gap"] == pytest.approx(0.0000, abs=self._PRECISION)
assert res["last_t"] == 5
assert res["best_t"] == 5
assert res["disp"] == pytest.approx(0.1, abs=self._PRECISION)
assert res["error"] == pytest.approx(0.25, abs=self._PRECISION)
assert res["n_oracle_calls"] == 32
assert res["n_classifiers"] == 3
# Also allow the threshold to be adjustable
score_threshold = 0.625
number_a0 = 4
number_a1 = 4
a0_label = 17
a1_label = 37
X, Y, A = _simple_threshold_data(number_a0, number_a1,
score_threshold, score_threshold,
a0_label, a1_label)
target = GridSearch(LogisticRegression(solver='liblinear', fit_intercept=True),
constraints=DemographicParity(),
grid_size=11)
target.fit(X, Y, sensitive_features=A)
assert len(target.all_results) == 11
test_X = pd.DataFrame({"actual_feature": [0.2, 0.7],
"sensitive_features": [a0_label, a1_label],
"constant_ones_feature": [1, 1]})
sample_results = target.predict(test_X)
sample_proba = target.predict_proba(test_X)
assert np.allclose(sample_proba, [[0.53748641, 0.46251359], [0.46688736, 0.53311264]])
sample_results = target.all_results[0].predictor.predict(test_X)
assert np.array_equal(sample_results, [1, 0])
def test_project_lambda_smoke_negatives():
dp = DemographicParity()
events = ['all']
signs = ['+', '-']
labels = ['a', 'b']
midx = pd.MultiIndex.from_product(
[signs, events, labels],
names=[_SIGN, _EVENT, _GROUP_ID])
df = pd.DataFrame()
# Note that the '-' indices (11 and 19) are larger
# than the '+' indices (1 and 2)
df = 0 + pd.Series([1, 2, 11, 19], index=midx)
ls = dp.project_lambda(df)
expected = pd.DataFrame()
def test_signed_weights():
dp = DemographicParity()
assert dp.short_name == "DemographicParity"
num_samples_a0 = 10
num_samples_a1 = 30
num_samples = num_samples_a0 + num_samples_a1
a0_threshold = 0.2
a1_threshold = 0.7
a0_label = "OneThing"
a1_label = "AnotherThing"
X, Y, A = simple_binary_threshold_data(num_samples_a0, num_samples_a1,
a0_threshold, a1_threshold,
a0_label, a1_label)
class TestExponentiatedGradientSmoke:
def setup_method(self, method):
self.X = pd.DataFrame({"X1": X1, "X2": X2, "X3": X3})
self.y = pd.Series(labels)
self.A = pd.Series(sensitive_features)
self.learner = LeastSquaresBinaryClassifierLearner()
self._PRECISION = 1e-6
smoke_test_data = [{"cons_class": DemographicParity, "eps": 0.100,
"best_gap": 0.000000, "last_t": 5,
"best_t": 5, "disp": 0.100000,
"error": 0.250000, "n_oracle_calls": 32,
"n_classifiers": 3},
{"cons_class": DemographicParity, "eps": 0.050,
"best_gap": 0.000000, "last_t": 5,
"best_t": 5, "disp": 0.050000,
"error": 0.266522, "n_oracle_calls": 23,
"n_classifiers": 6},
{"cons_class": DemographicParity, "eps": 0.020,
"best_gap": 0.000000, "last_t": 5,
"best_t": 5, "disp": 0.020000,
"error": 0.332261, "n_oracle_calls": 22,
"n_classifiers": 5},
{"cons_class": DemographicParity, "eps": 0.010,
"best_gap": 0.000000, "last_t": 5,
"best_t": 5, "disp": 0.010000,
"error": 0.354174, "n_oracle_calls": 22,
"n_classifiers": 5},
{"cons_class": DemographicParity, "eps": 0.005,
"best_gap": 0.000000, "last_t": 5,
def test_construct_and_load():
dp = DemographicParity()
assert dp.short_name == "DemographicParity"
num_samples_a0 = 10
num_samples_a1 = 30
num_samples = num_samples_a0 + num_samples_a1
a0_threshold = 0.2
a1_threshold = 0.7
a0_label = 2
a1_label = 3
X, Y, A = simple_binary_threshold_data(num_samples_a0, num_samples_a1,
a0_threshold, a1_threshold,
a0_label, a1_label)
def test_project_lambda_smoke_positives():
# This is a repeat of the _negatives method but with
# the '+' indices larger
dp = DemographicParity()
events = ['all']
signs = ['+', '-']
labels = ['a', 'b']
midx = pd.MultiIndex.from_product(
[signs, events, labels],
names=[_SIGN, _EVENT, _GROUP_ID])
df = pd.DataFrame()
# Note that the '-' indices are smaller than
# the '+' ones
df = 0 + pd.Series([23, 19, 5, 7], index=midx)
ls = dp.project_lambda(df)
expected = pd.DataFrame()
def setup_method(self, method):
self.estimator = LogisticRegression(solver='liblinear')
self.disparity_criterion = DemographicParity()