Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_encode_fit():
df = _some_df()
encode_stage = Encode()
with pytest.raises(UnfittedPipelineStageError):
encode_stage.transform(df)
res_df = encode_stage.fit(df)
assert 'lbl' in res_df.columns
assert res_df['lbl'][1] == 'acd'
assert res_df['lbl'][2] == 'alk'
assert res_df['lbl'][3] == 'alk'
# see only transform (no fit) when already fitted
df2 = _some_df2()
res_df2 = encode_stage.transform(df2)
assert 'lbl' in res_df2.columns
assert res_df2['lbl'][1] == 1
assert res_df2['lbl'][2] == 0
def test_encode_in_pipelin_fit_n_transform():
drop_name = pdp.ColDrop('name')
encode_stage = Encode()
pline = drop_name + encode_stage
df = _some_df()
with pytest.raises(UnfittedPipelineStageError):
res_df = pline.transform(df)
res_df = pline.fit(df)
assert 'lbl' in res_df.columns
assert 'name' in res_df.columns
assert res_df['lbl'][1] == 'acd'
assert res_df['lbl'][2] == 'alk'
assert res_df['lbl'][3] == 'alk'
res_df = pline.transform(df)
assert 'lbl' in res_df.columns
def test_encode_in_pipelin_fit_n_transform():
drop_name = pdp.ColDrop('name')
encode_stage = Encode()
pline = drop_name + encode_stage
df = _some_df()
with pytest.raises(UnfittedPipelineStageError):
res_df = pline.transform(df)
res_df = pline.fit(df)
assert 'lbl' in res_df.columns
assert 'name' in res_df.columns
assert res_df['lbl'][1] == 'acd'
assert res_df['lbl'][2] == 'alk'
assert res_df['lbl'][3] == 'alk'
res_df = pline.transform(df)
def test_row_drop_xor_reducer():
"""Testing the ColDrop pipeline stage."""
res_df = RowDrop([lambda x: x < 3]).apply(DF3)
assert 1 not in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
res_df = RowDrop([lambda x: x < 3], reduce='xor').apply(DF3)
assert 1 in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
def test_row_drop_all_reducer():
"""Testing the ColDrop pipeline stage."""
res_df = RowDrop([lambda x: x < 3]).apply(DF3)
assert 1 not in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
res_df = RowDrop([lambda x: x < 3], reduce='all').apply(DF3)
assert 1 not in res_df.index
assert 2 in res_df.index
assert 3 in res_df.index
def test_row_drop_bad_reducer():
"""Testing the ColDrop pipeline stage."""
with pytest.raises(ValueError):
RowDrop([lambda x: x < 2], reduce='al')
def test_row_drop_all_reducer():
"""Testing the ColDrop pipeline stage."""
res_df = RowDrop([lambda x: x < 3]).apply(DF3)
assert 1 not in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
res_df = RowDrop([lambda x: x < 3], reduce='all').apply(DF3)
assert 1 not in res_df.index
assert 2 in res_df.index
assert 3 in res_df.index
def test_row_drop_columns():
"""Testing the ColDrop pipeline stage."""
res_df = RowDrop([lambda x: x < 2]).apply(DF2)
assert 1 not in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
res_df = RowDrop([lambda x: x < 2], columns=['a', 'b']).apply(DF2)
assert 1 not in res_df.index
assert 2 in res_df.index
assert 3 in res_df.index
def test_row_drop_xor_reducer():
"""Testing the ColDrop pipeline stage."""
res_df = RowDrop([lambda x: x < 3]).apply(DF3)
assert 1 not in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
res_df = RowDrop([lambda x: x < 3], reduce='xor').apply(DF3)
assert 1 in res_df.index
assert 2 not in res_df.index
assert 3 in res_df.index
def test_row_drop_verbose():
"""Testing the ColDrop pipeline stage."""
res_df = RowDrop([lambda x: x < 2]).apply(DF1, verbose=True)
assert 1 not in res_df.index
assert 2 in res_df.index
assert 3 in res_df.index