Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
default_mappings = {
'label_maps': [{1.0: 'Good Credit', 2.0: 'Bad Credit'}],
'protected_attribute_maps': [{1.0: 'Male', 0.0: 'Female'},
{1.0: 'Old', 0.0: 'Young'}],
}
def default_preprocessing(df):
"""Adds a derived sex attribute based on personal_status."""
# TODO: ignores the value of privileged_classes for 'sex'
status_map = {'A91': 'male', 'A93': 'male', 'A94': 'male',
'A92': 'female', 'A95': 'female'}
df['sex'] = df['personal_status'].replace(status_map)
return df
class GermanDataset(StandardDataset):
"""German credit Dataset.
See :file:`aif360/data/raw/german/README.md`.
"""
def __init__(self, label_name='credit', favorable_classes=[1],
protected_attribute_names=['sex', 'age'],
privileged_classes=[['male'], lambda x: x > 25],
instance_weights_name=None,
categorical_features=['status', 'credit_history', 'purpose',
'savings', 'employment', 'other_debtors', 'property',
'installment_plans', 'housing', 'skill_level', 'telephone',
'foreign_worker'],
features_to_keep=[], features_to_drop=['personal_status'],
na_values=[], custom_preprocessing=default_preprocessing,
metadata=default_mappings):
'label_maps': [{1.0: 'Did recid.', 0.0: 'No recid.'}],
'protected_attribute_maps': [{0.0: 'Male', 1.0: 'Female'},
{1.0: 'Caucasian', 0.0: 'Not Caucasian'}]
}
def default_preprocessing(df):
"""Perform the same preprocessing as the original analysis:
https://github.com/propublica/compas-analysis/blob/master/Compas%20Analysis.ipynb
"""
return df[(df.days_b_screening_arrest <= 30)
& (df.days_b_screening_arrest >= -30)
& (df.is_recid != -1)
& (df.c_charge_degree != 'O')
& (df.score_text != 'N/A')]
class CompasDataset(StandardDataset):
"""ProPublica COMPAS Dataset.
See :file:`aif360/data/raw/compas/README.md`.
"""
def __init__(self, label_name='two_year_recid', favorable_classes=[0],
protected_attribute_names=['sex', 'race'],
privileged_classes=[['Female'], ['Caucasian']],
instance_weights_name=None,
categorical_features=['age_cat', 'c_charge_degree',
'c_charge_desc'],
features_to_keep=['sex', 'age', 'age_cat', 'race',
'juv_fel_count', 'juv_misd_count', 'juv_other_count',
'priors_count', 'c_charge_degree', 'c_charge_desc',
'two_year_recid'],
features_to_drop=[], na_values=[],
'PHQ242','EMPST','POVCAT','INSCOV']] >= -1).all(1)] #for all other categorical features, remove values < -1
def utilization(row):
return row['OBTOTV15'] + row['OPTOTV15'] + row['ERTOT15'] + row['IPNGTD15'] + row['HHTOTD15']
df['TOTEXP15'] = df.apply(lambda row: utilization(row), axis=1)
lessE = df['TOTEXP15'] < 10.0
df.loc[lessE,'TOTEXP15'] = 0.0
moreE = df['TOTEXP15'] >= 10.0
df.loc[moreE,'TOTEXP15'] = 1.0
df = df.rename(columns = {'TOTEXP15' : 'UTILIZATION'})
return df
class MEPSDataset19(StandardDataset):
"""MEPS Dataset.
See :file:`aif360/data/raw/meps/README.md`.
"""
def __init__(self, label_name='UTILIZATION', favorable_classes=[1.0],
protected_attribute_names=['RACE'],
privileged_classes=[['White']],
instance_weights_name='PERWT15F',
categorical_features=['REGION','SEX','MARRY',
'FTSTU','ACTDTY','HONRDC','RTHLTH','MNHLTH','HIBPDX','CHDDX','ANGIDX',
'MIDX','OHRTDX','STRKDX','EMPHDX','CHBRON','CHOLDX','CANCERDX','DIABDX',
'JTPAIN','ARTHDX','ARTHTYPE','ASTHDX','ADHDADDX','PREGNT','WLKLIM',
'ACTLIM','SOCLIM','COGLIM','DFHEAR42','DFSEE42','ADSMOK42',
'PHQ242','EMPST','POVCAT','INSCOV'],
features_to_keep=['REGION','AGE','SEX','RACE','MARRY',
'PHQ242','EMPST','POVCAT','INSCOV']] >= -1).all(1)] #for all other categorical features, remove values < -1
def utilization(row):
return row['OBTOTV16'] + row['OPTOTV16'] + row['ERTOT16'] + row['IPNGTD16'] + row['HHTOTD16']
df['TOTEXP16'] = df.apply(lambda row: utilization(row), axis=1)
lessE = df['TOTEXP16'] < 10.0
df.loc[lessE,'TOTEXP16'] = 0.0
moreE = df['TOTEXP16'] >= 10.0
df.loc[moreE,'TOTEXP16'] = 1.0
df = df.rename(columns = {'TOTEXP16' : 'UTILIZATION'})
return df
class MEPSDataset21(StandardDataset):
"""MEPS Dataset.
See :file:`aif360/data/raw/meps/README.md`.
"""
def __init__(self, label_name='UTILIZATION', favorable_classes=[1.0],
protected_attribute_names=['RACE'],
privileged_classes=[['White']],
instance_weights_name='PERWT16F',
categorical_features=['REGION','SEX','MARRY',
'FTSTU','ACTDTY','HONRDC','RTHLTH','MNHLTH','HIBPDX','CHDDX','ANGIDX',
'MIDX','OHRTDX','STRKDX','EMPHDX','CHBRON','CHOLDX','CANCERDX','DIABDX',
'JTPAIN','ARTHDX','ARTHTYPE','ASTHDX','ADHDADDX','PREGNT','WLKLIM',
'ACTLIM','SOCLIM','COGLIM','DFHEAR42','DFSEE42', 'ADSMOK42', 'PHQ242',
'EMPST','POVCAT','INSCOV'],
features_to_keep=['REGION','AGE','SEX','RACE','MARRY',
import os
import pandas as pd
from aif360.datasets import StandardDataset
default_mappings = {
'label_maps': [{1.0: '>50K', 0.0: '<=50K'}],
'protected_attribute_maps': [{1.0: 'White', 0.0: 'Non-white'},
{1.0: 'Male', 0.0: 'Female'}]
}
class AdultDataset(StandardDataset):
"""Adult Census Income Dataset.
See :file:`aif360/data/raw/adult/README.md`.
"""
def __init__(self, label_name='income-per-year',
favorable_classes=['>50K', '>50K.'],
protected_attribute_names=['race', 'sex'],
privileged_classes=[['White'], ['Male']],
instance_weights_name=None,
categorical_features=['workclass', 'education',
'marital-status', 'occupation', 'relationship',
'native-country'],
features_to_keep=[], features_to_drop=['fnlwgt'],
na_values=['?'], custom_preprocessing=None,
metadata=default_mappings):
'PHQ242','EMPST','POVCAT','INSCOV']] >= -1).all(1)] #for all other categorical features, remove values < -1
def utilization(row):
return row['OBTOTV15'] + row['OPTOTV15'] + row['ERTOT15'] + row['IPNGTD15'] + row['HHTOTD15']
df['TOTEXP15'] = df.apply(lambda row: utilization(row), axis=1)
lessE = df['TOTEXP15'] < 10.0
df.loc[lessE,'TOTEXP15'] = 0.0
moreE = df['TOTEXP15'] >= 10.0
df.loc[moreE,'TOTEXP15'] = 1.0
df = df.rename(columns = {'TOTEXP15' : 'UTILIZATION'})
return df
class MEPSDataset20(StandardDataset):
"""MEPS Dataset.
See :file:`aif360/data/raw/meps/README.md`.
"""
def __init__(self, label_name='UTILIZATION', favorable_classes=[1.0],
protected_attribute_names=['RACE'],
privileged_classes=[['White']],
instance_weights_name='PERWT15F',
categorical_features=['REGION','SEX','MARRY',
'FTSTU','ACTDTY','HONRDC','RTHLTH','MNHLTH','HIBPDX','CHDDX','ANGIDX',
'MIDX','OHRTDX','STRKDX','EMPHDX','CHBRON','CHOLDX','CANCERDX','DIABDX',
'JTPAIN','ARTHDX','ARTHTYPE','ASTHDX','ADHDADDX','PREGNT','WLKLIM',
'ACTLIM','SOCLIM','COGLIM','DFHEAR42','DFSEE42', 'ADSMOK42', 'PHQ242',
'EMPST','POVCAT','INSCOV'],
features_to_keep=['REGION','AGE','SEX','RACE','MARRY',
import os
import pandas as pd
from aif360.datasets import StandardDataset
class BankDataset(StandardDataset):
"""Bank marketing Dataset.
See :file:`aif360/data/raw/bank/README.md`.
"""
def __init__(self, label_name='y', favorable_classes=['yes'],
protected_attribute_names=['age'],
privileged_classes=[lambda x: x >= 25],
instance_weights_name=None,
categorical_features=['job', 'marital', 'education', 'default',
'housing', 'loan', 'contact', 'month', 'day_of_week',
'poutcome'],
features_to_keep=[], features_to_drop=[],
na_values=["unknown"], custom_preprocessing=None,
metadata=None):
"""See :obj:`StandardDataset` for a description of the arguments.