Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get_grouped_data_and_base_points(sensitive_feature_value):
data = pd.DataFrame({
SENSITIVE_FEATURE_KEY: sensitive_features_ex1,
SCORE_KEY: scores_ex,
LABEL_KEY: labels_ex})
grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group(sensitive_feature_value) \
.sort_values(by=SCORE_KEY, ascending=False)
x_grid = np.linspace(0, 1, 100)
if sensitive_feature_value == "A":
expected_roc_points = pd.DataFrame({
"x": [0, 0.25, 0.5, 0.5, 1],
"y": [0, 1/3, 2/3, 1, 1],
"operation": [ThresholdOperation('>', np.inf),
ThresholdOperation('<', 0.5),
ThresholdOperation('<', 1.5),
ThresholdOperation('<', 2.5),
ThresholdOperation('>', -np.inf)]
})
ignore_for_base_points = [1, 2]
if sensitive_feature_value == "B":
def test_calculate_roc_points():
data = pd.DataFrame({
SENSITIVE_FEATURE_KEY: sensitive_features_ex1,
SCORE_KEY: scores_ex,
LABEL_KEY: labels_ex})
grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group("A") \
.sort_values(by=SCORE_KEY, ascending=False)
roc_points = _calculate_roc_points(grouped_data, "A")
expected_roc_points = pd.DataFrame({
"x": [0, 0.25, 0.5, 0.5, 1],
"y": [0, 1/3, 2/3, 1, 1],
"operation": [ThresholdOperation('>', np.inf),
ThresholdOperation('<', 0.5),
ThresholdOperation('<', 1.5),
ThresholdOperation('<', 2.5),
ThresholdOperation('>', -np.inf)]
})
_assert_equal_points(expected_roc_points, roc_points)
# Try filtering to get the convex hull of the ROC points.
def _get_grouped_data_and_base_points(sensitive_feature_value):
data = pd.DataFrame({
SENSITIVE_FEATURE_KEY: sensitive_features_ex1,
SCORE_KEY: scores_ex,
LABEL_KEY: labels_ex})
grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group(sensitive_feature_value) \
.sort_values(by=SCORE_KEY, ascending=False)
x_grid = np.linspace(0, 1, 100)
if sensitive_feature_value == "A":
expected_roc_points = pd.DataFrame({
"x": [0, 0.25, 0.5, 0.5, 1],
"y": [0, 1/3, 2/3, 1, 1],
"operation": [ThresholdOperation('>', np.inf),
ThresholdOperation('<', 0.5),
ThresholdOperation('<', 1.5),
ThresholdOperation('<', 2.5),
ThresholdOperation('>', -np.inf)]
})
def test_calculate_roc_points():
data = pd.DataFrame({
SENSITIVE_FEATURE_KEY: sensitive_features_ex1,
SCORE_KEY: scores_ex,
LABEL_KEY: labels_ex})
grouped_data = data.groupby(SENSITIVE_FEATURE_KEY).get_group("A") \
.sort_values(by=SCORE_KEY, ascending=False)
roc_points = _calculate_roc_points(grouped_data, "A")
expected_roc_points = pd.DataFrame({
"x": [0, 0.25, 0.5, 0.5, 1],
"y": [0, 1/3, 2/3, 1, 1],
"operation": [ThresholdOperation('>', np.inf),
ThresholdOperation('<', 0.5),
ThresholdOperation('<', 1.5),
ThresholdOperation('<', 2.5),
ThresholdOperation('>', -np.inf)]
})
:type labels: pandas.Series, pandas.DataFrame, numpy.ndarray, or list
:param scores: the output from the unconstrained predictor used for training the mitigator
:type scores: pandas.Series, pandas.DataFrame, numpy.ndarray, or list
:param sensitive_feature_names: list of names for the sensitive features in case they were
not implicitly provided (e.g. if `sensitive_features` is of type DataFrame); default
None
:type sensitive_feature_names: list of strings
:return: the training data for the mitigator, grouped by sensitive feature value
:rtype: pandas.DataFrameGroupBy
"""
data_dict = {}
# TODO: extend to multiple columns for additional group data
# and name columns after original column names if possible
# or store the original column names
sensitive_feature_name = SENSITIVE_FEATURE_KEY
if sensitive_feature_names is not None:
if sensitive_feature_name in [SCORE_KEY, LABEL_KEY]:
raise ValueError(SENSITIVE_FEATURE_NAME_CONFLICT_DETECTED_ERROR_MESSAGE)
sensitive_feature_name = sensitive_feature_names[0]
_reformat_data_into_dict(sensitive_feature_name, data_dict, sensitive_features)
_reformat_data_into_dict(SCORE_KEY, data_dict, scores)
_reformat_data_into_dict(LABEL_KEY, data_dict, labels)
return pd.DataFrame(data_dict).groupby(sensitive_feature_name)