Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_out_of_place_col_insert_nameless_error():
"""Testing the ColDrop pipeline stage."""
df = _test_df()
series = pd.Series(
data=[10, 20],
index=[1, 2])
with pytest.raises(ValueError):
out_of_place_col_insert(df, series, 1)
def test_out_of_place_col_insert_all_params():
"""Testing the ColDrop pipeline stage."""
df = _test_df()
series = pd.Series(
data=[10, 20],
index=[1, 2],
name='tens')
result_df = out_of_place_col_insert(df, series, 1, 'Tigers')
assert 'tens' not in result_df.columns
assert 'Tigers' in result_df.columns
assert result_df.columns.get_loc('Tigers') == 1
assert result_df['Tigers'][1] == 10
assert result_df['Tigers'][2] == 20
def test_out_of_place_col_insert_no_col_name():
"""Testing the ColDrop pipeline stage."""
df = _test_df()
series = pd.Series(
data=[10, 20],
index=[1, 2],
name='tens')
result_df = out_of_place_col_insert(df, series, 1)
assert 'tens' in result_df.columns
assert result_df.columns.get_loc('tens') == 1
assert result_df['tens'][1] == 10
assert result_df['tens'][2] == 20
def test_out_of_place_col_last_position():
"""Testing the ColDrop pipeline stage."""
df = _test_df()
series = pd.Series(
data=[10, 20],
index=[1, 2],
name='tens')
result_df = out_of_place_col_insert(df, series, len(df.columns), 'Tigers')
assert 'tens' not in result_df.columns
assert 'Tigers' in result_df.columns
assert result_df.columns.get_loc('Tigers') == 2
assert result_df['Tigers'][1] == 10
assert result_df['Tigers'][2] == 20
def _transform(self, df, verbose):
inter_df = df
for colname in self.encoders:
lbl_enc = self.encoders[colname]
source_col = df[colname]
loc = df.columns.get_loc(colname) + 1
new_name = colname + "_enc"
if self._drop:
inter_df = inter_df.drop(colname, axis=1)
new_name = colname
loc -= 1
inter_df = out_of_place_col_insert(
df=inter_df,
series=lbl_enc.transform(source_col),
loc=loc,
column_name=new_name,
)
return inter_df
new_cols = df.apply(self._func, axis=1)
if isinstance(new_cols, pd.Series):
loc = len(df.columns)
if self._follow_column:
loc = df.columns.get_loc(self._follow_column) + 1
return out_of_place_col_insert(
df=df, series=new_cols, loc=loc, column_name=self._colname
)
elif isinstance(new_cols, pd.DataFrame):
sorted_cols = sorted(list(new_cols.columns))
new_cols = new_cols[sorted_cols]
if self._follow_column:
inter_df = df
loc = df.columns.get_loc(self._follow_column) + 1
for colname in new_cols.columns:
inter_df = out_of_place_col_insert(
df=inter_df,
series=new_cols[colname],
loc=loc,
column_name=colname,
)
loc += 1
return inter_df
assign_map = {
colname: new_cols[colname] for colname in new_cols.columns
}
return df.assign(**assign_map)
raise TypeError( # pragma: no cover
"Unexpected type generated by applying a function to a DataFrame."
" Only Series and DataFrame are allowed."
new_name = colname + "_log"
if self._drop:
inter_df = inter_df.drop(colname, axis=1)
new_name = colname
loc -= 1
new_col = source_col
if self._non_neg:
minval = min(new_col)
if minval < 0:
new_col = new_col + abs(minval)
self._col_to_minval[colname] = abs(minval)
# must check not None as neg numbers eval to False
if self._const_shift is not None:
new_col = new_col + self._const_shift
new_col = np.log(new_col)
inter_df = out_of_place_col_insert(
df=inter_df, series=new_col, loc=loc, column_name=new_name
)
self.is_fitted = True
return inter_df
df.select_dtypes(include=["object", "category"]).columns
).difference(self._exclude_columns)
)
if verbose:
columns_to_encode = tqdm.tqdm(columns_to_encode)
inter_df = df
for colname in columns_to_encode:
lbl_enc = sklearn.preprocessing.LabelEncoder()
source_col = df[colname]
loc = df.columns.get_loc(colname) + 1
new_name = colname + "_enc"
if self._drop:
inter_df = inter_df.drop(colname, axis=1)
new_name = colname
loc -= 1
inter_df = out_of_place_col_insert(
df=inter_df,
series=lbl_enc.fit_transform(source_col),
loc=loc,
column_name=new_name,
)
self.encoders[colname] = lbl_enc
self.is_fitted = True
return inter_df
def _transform(self, df, verbose):
inter_df = df
for colname in self._columns:
source_col = df[colname]
loc = df.columns.get_loc(colname) + 1
new_name = colname + "_norare"
if self._drop:
inter_df = inter_df.drop(colname, axis=1)
new_name = colname
loc -= 1
rare_remover = self._rare_removers[colname]
inter_df = out_of_place_col_insert(
df=inter_df,
series=source_col.map(rare_remover),
loc=loc,
column_name=new_name)
return inter_df
def _transform(self, df, verbose):
new_cols = df.apply(self._func, axis=1)
if isinstance(new_cols, pd.Series):
loc = len(df.columns)
if self._follow_column:
loc = df.columns.get_loc(self._follow_column) + 1
return out_of_place_col_insert(
df=df, series=new_cols, loc=loc, column_name=self._colname
)
elif isinstance(new_cols, pd.DataFrame):
sorted_cols = sorted(list(new_cols.columns))
new_cols = new_cols[sorted_cols]
if self._follow_column:
inter_df = df
loc = df.columns.get_loc(self._follow_column) + 1
for colname in new_cols.columns:
inter_df = out_of_place_col_insert(
df=inter_df,
series=new_cols[colname],
loc=loc,
column_name=colname,
)
loc += 1