Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_onehotencode_with_dummy_na():
"""Basic binning test."""
df = _one_categ_df_with_nan()
onehotencode = OneHotEncode("Born", dummy_na=True)
res_df = onehotencode(df)
assert "Born" not in res_df.columns
assert "Born_nan" not in res_df.columns
assert "Born_UK" in res_df.columns
assert res_df["Born_UK"][1] == 0
assert res_df["Born_UK"][2] == 1
assert res_df["Born_UK"][3] == 0
assert "Born_USA" in res_df.columns
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
# check when fitted
df2 = _one_categ_single_row_df()
assert onehotencode.is_fitted
res_df2 = onehotencode(df2, verbose=True)
def test_onehotencode_one_no_drop():
"""Basic binning test."""
df = _one_categ_df()
onehotencode = OneHotEncode("Born", drop=False)
res_df = onehotencode(df, verbose=True)
assert "Greece" not in res_df.columns
assert "Born" in res_df.columns
assert "Born_UK" in res_df.columns
assert res_df["Born_UK"][1] == 0
assert res_df["Born_UK"][2] == 1
assert res_df["Born_UK"][3] == 0
assert "Born_USA" in res_df.columns
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
# check when fitted
df2 = _one_categ_single_row_df()
assert onehotencode.is_fitted
res_df2 = onehotencode(df2, verbose=True)
def test_onehotencode_large():
"""Basic binning test."""
df = _one_categ_df()
onehotencode = OneHotEncode("Born")
res_df = onehotencode(df, verbose=True)
assert "Born" not in res_df.columns
assert "Born_Greece" not in res_df.columns
assert "Born_UK" in res_df.columns
assert res_df["Born_UK"][1] == 0
assert res_df["Born_UK"][2] == 1
assert res_df["Born_UK"][3] == 0
assert "Born_USA" in res_df.columns
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
# check when fitted
df2 = _one_categ_df_large()
assert onehotencode.is_fitted
res_df2 = onehotencode(df2, verbose=True)
def test_onehotencode_with_dummy_na_no_drop_first():
"""Basic binning test."""
df = _one_categ_df_with_nan()
onehotencode = OneHotEncode("Born", dummy_na=True, drop_first=False)
res_df = onehotencode(df)
assert "Born" not in res_df.columns
assert "Born_UK" in res_df.columns
assert res_df["Born_UK"][1] == 0
assert res_df["Born_UK"][2] == 1
assert res_df["Born_UK"][3] == 0
assert "Born_USA" in res_df.columns
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
assert "Born_nan" in res_df.columns
assert res_df["Born_nan"][1] == 0
assert res_df["Born_nan"][2] == 0
assert res_df["Born_nan"][3] == 1
# check when fitted
def test_onehotencode_one_with_exclude():
"""Basic binning test."""
df = _two_categ_df()
onehotencode = OneHotEncode(exclude_columns=["Name"])
res_df = onehotencode(df)
assert "Born" not in res_df.columns
assert "Name" in res_df.columns
assert "Name_Bob" not in res_df.columns
assert "Name_Jack" not in res_df.columns
assert "Name_Yan" not in res_df.columns
assert "Greece" not in res_df.columns
assert "Born_UK" in res_df.columns
assert res_df["Born_UK"][1] == 0
assert res_df["Born_UK"][2] == 1
assert res_df["Born_UK"][3] == 0
assert "Born_USA" in res_df.columns
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
def test_onehotencode_with_nan():
"""Basic binning test."""
df = _one_categ_df_with_nan()
onehotencode = OneHotEncode("Born")
res_df = onehotencode(df)
print(res_df)
assert "Born" not in res_df.columns
assert "Born_UK" not in res_df.columns
assert "Born_nan" not in res_df.columns
assert "Born_USA" in res_df.columns
assert len(res_df.columns) == 1
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
# check when fitted
df2 = _one_categ_single_row_df()
assert onehotencode.is_fitted
res_df2 = onehotencode(df2, verbose=True)
print(res_df2)
def test_onehotencode_one_with_drop_first_colname(verbose):
"""Basic binning test."""
df = _one_categ_df()
onehotencode = OneHotEncode("Born", drop_first="UK")
res_df = onehotencode(df, verbose=verbose)
assert "Born" not in res_df.columns
assert "Born_Greece" in res_df.columns
assert "Born_UK" not in res_df.columns
assert res_df["Born_Greece"][1] == 0
assert res_df["Born_Greece"][2] == 0
assert res_df["Born_Greece"][3] == 1
assert "Born_USA" in res_df.columns
assert res_df["Born_USA"][1] == 1
assert res_df["Born_USA"][2] == 0
assert res_df["Born_USA"][3] == 0
# check when fitted
df2 = _one_categ_single_row_df()
assert onehotencode.is_fitted
res_df2 = onehotencode(df2, verbose=True)
self._columns = None
else:
self._columns = _interpret_columns_param(columns)
self._dummy_na = dummy_na
if exclude_columns is None:
self._exclude_columns = []
else:
self._exclude_columns = _interpret_columns_param(exclude_columns)
self._col_subset = col_subset
self._drop_first = drop_first
self._drop = drop
self._dummy_col_map = {}
self._encoder_map = {}
col_str = _list_str(self._columns)
super_kwargs = {
"exmsg": OneHotEncode._DEF_1HENCODE_EXC_MSG.format(col_str),
"appmsg": OneHotEncode._DEF_1HENCODE_APP_MSG.format(
col_str or "all columns"
),
"desc": "One-hot encode {}".format(
col_str or "all categorical columns"
),
}
super_kwargs.update(**kwargs)
super().__init__(**super_kwargs)
dfirst_col = colname + "_" + str(self._drop_first)
if dfirst_col in dummies:
if verbose:
print(
(
"Dropping {} dummy column instead of first "
"column when one-hot encoding {}."
).format(dfirst_col, colname)
)
dummies.drop(dfirst_col, axis=1, inplace=True)
elif nan_col in dummies:
dummies.drop(nan_col, axis=1, inplace=True)
else:
dummies.drop(dummies.columns[0], axis=1, inplace=True)
self._dummy_col_map[colname] = list(dummies.columns)
self._encoder_map[colname] = OneHotEncode._FitterEncoder(
colname, list(dummies.columns)
)
for column in dummies:
assign_map[column] = dummies[column]
inter_df = df.assign(**assign_map)
self.is_fitted = True
if self._drop:
return inter_df.drop(columns_to_encode, axis=1)
return inter_df
else:
self._columns = _interpret_columns_param(columns)
self._dummy_na = dummy_na
if exclude_columns is None:
self._exclude_columns = []
else:
self._exclude_columns = _interpret_columns_param(exclude_columns)
self._col_subset = col_subset
self._drop_first = drop_first
self._drop = drop
self._dummy_col_map = {}
self._encoder_map = {}
col_str = _list_str(self._columns)
super_kwargs = {
"exmsg": OneHotEncode._DEF_1HENCODE_EXC_MSG.format(col_str),
"appmsg": OneHotEncode._DEF_1HENCODE_APP_MSG.format(
col_str or "all columns"
),
"desc": "One-hot encode {}".format(
col_str or "all categorical columns"
),
}
super_kwargs.update(**kwargs)
super().__init__(**super_kwargs)