Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_col_binner():
"""Testing the col binner helper class."""
binner = Bin._get_col_binner([0, 5])
assert binner(-math.inf) == '<0'
assert binner(-4) == '<0'
assert binner(0) == '0-5'
assert binner(1) == '0-5'
assert binner(4.99) == '0-5'
assert binner(5) == '5โค'
assert binner(232) == '5โค'
assert binner(math.inf) == '5โค'
def test_bin_two_col():
"""Basic binning test."""
df = pd.DataFrame([[-3, 9], [4, 2], [5, 1], [9, 5]], columns=['s', 'p'])
bin_stage = Bin({'s': [5], 'p': [5]}, drop=False)
res_df = bin_stage.apply(df, verbose=True)
assert 's_bin' in res_df.columns
assert res_df['s_bin'][0] == '<5'
assert res_df['s_bin'][1] == '<5'
assert res_df['s_bin'][2] == '5โค'
assert res_df['s_bin'][3] == '5โค'
assert 'p_bin' in res_df.columns
assert res_df['p_bin'][0] == '5โค'
assert res_df['p_bin'][1] == '<5'
assert res_df['p_bin'][2] == '<5'
assert res_df['p_bin'][3] == '5โค'
def test_attribute_stage():
"""Testing attribute pipeline stages."""
pipeline = pdp.ColDrop('name').Bin({'speed': [5]}, drop=True)
assert isinstance(pipeline, PdPipeline)
assert isinstance(pipeline[0], ColDrop)
assert isinstance(pipeline[1], Bin)
df = _some_df()
res_df = pipeline(df)
assert 'speed' in res_df.columns
assert 'name' not in res_df.columns
def test_bin_drop():
"""Basic binning test."""
df = pd.DataFrame([[-3], [4], [5], [9]], [1, 2, 3, 4], ['speed'])
bin_stage = Bin({'speed': [5]}, drop=True)
res_df = bin_stage.apply(df, verbose=True)
assert 'speed_bin' not in res_df.columns
assert res_df['speed'][1] == '<5'
assert res_df['speed'][2] == '<5'
assert res_df['speed'][3] == '5โค'
assert res_df['speed'][4] == '5โค'
def test_bin_verbose():
"""Basic binning test."""
df = pd.DataFrame([[-3], [4], [5], [9]], [1, 2, 3, 4], ['speed'])
bin_stage = Bin({'speed': [5]}, drop=False)
res_df = bin_stage.apply(df, verbose=True)
assert 'speed_bin' in res_df.columns
assert res_df['speed_bin'][1] == '<5'
assert res_df['speed_bin'][2] == '<5'
assert res_df['speed_bin'][3] == '5โค'
assert res_df['speed_bin'][4] == '5โค'
def __init__(self, bin_map, drop=True, **kwargs):
self._bin_map = bin_map
self._drop = drop
columns_str = _list_str(list(bin_map.keys()))
super_kwargs = {
"exmsg": Bin._DEF_BIN_EXC_MSG.format(columns_str),
"appmsg": Bin._DEF_BIN_APP_MSG.format(
"s" if len(bin_map) > 1 else "", columns_str
),
"desc": self._default_desc(),
}
super_kwargs.update(**kwargs)
super().__init__(**super_kwargs)
def __init__(self, bin_map, drop=True, **kwargs):
self._bin_map = bin_map
self._drop = drop
columns_str = _list_str(list(bin_map.keys()))
super_kwargs = {
"exmsg": Bin._DEF_BIN_EXC_MSG.format(columns_str),
"appmsg": Bin._DEF_BIN_APP_MSG.format(
"s" if len(bin_map) > 1 else "", columns_str
),
"desc": self._default_desc(),
}
super_kwargs.update(**kwargs)
super().__init__(**super_kwargs)