How to use the pdpipe.util.out_of_place_col_insert function in pdpipe

To help you get started, weโ€™ve selected a few pdpipe examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pdpipe / pdpipe / tests / test_util.py View on Github external
def test_out_of_place_col_insert_nameless_error():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    series = pd.Series(
        data=[10, 20],
        index=[1, 2])

    with pytest.raises(ValueError):
        out_of_place_col_insert(df, series, 1)
github pdpipe / pdpipe / tests / test_util.py View on Github external
def test_out_of_place_col_insert_all_params():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    series = pd.Series(
        data=[10, 20],
        index=[1, 2],
        name='tens')

    result_df = out_of_place_col_insert(df, series, 1, 'Tigers')
    assert 'tens' not in result_df.columns
    assert 'Tigers' in result_df.columns
    assert result_df.columns.get_loc('Tigers') == 1
    assert result_df['Tigers'][1] == 10
    assert result_df['Tigers'][2] == 20
github pdpipe / pdpipe / tests / test_util.py View on Github external
def test_out_of_place_col_insert_no_col_name():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    series = pd.Series(
        data=[10, 20],
        index=[1, 2],
        name='tens')

    result_df = out_of_place_col_insert(df, series, 1)
    assert 'tens' in result_df.columns
    assert result_df.columns.get_loc('tens') == 1
    assert result_df['tens'][1] == 10
    assert result_df['tens'][2] == 20
github pdpipe / pdpipe / tests / test_util.py View on Github external
def test_out_of_place_col_last_position():
    """Testing the ColDrop pipeline stage."""
    df = _test_df()
    series = pd.Series(
        data=[10, 20],
        index=[1, 2],
        name='tens')

    result_df = out_of_place_col_insert(df, series, len(df.columns), 'Tigers')
    assert 'tens' not in result_df.columns
    assert 'Tigers' in result_df.columns
    assert result_df.columns.get_loc('Tigers') == 2
    assert result_df['Tigers'][1] == 10
    assert result_df['Tigers'][2] == 20
github pdpipe / pdpipe / pdpipe / sklearn_stages.py View on Github external
def _transform(self, df, verbose):
        inter_df = df
        for colname in self.encoders:
            lbl_enc = self.encoders[colname]
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_enc"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=lbl_enc.transform(source_col),
                loc=loc,
                column_name=new_name,
            )
        return inter_df
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
new_cols = df.apply(self._func, axis=1)
        if isinstance(new_cols, pd.Series):
            loc = len(df.columns)
            if self._follow_column:
                loc = df.columns.get_loc(self._follow_column) + 1
            return out_of_place_col_insert(
                df=df, series=new_cols, loc=loc, column_name=self._colname
            )
        elif isinstance(new_cols, pd.DataFrame):
            sorted_cols = sorted(list(new_cols.columns))
            new_cols = new_cols[sorted_cols]
            if self._follow_column:
                inter_df = df
                loc = df.columns.get_loc(self._follow_column) + 1
                for colname in new_cols.columns:
                    inter_df = out_of_place_col_insert(
                        df=inter_df,
                        series=new_cols[colname],
                        loc=loc,
                        column_name=colname,
                    )
                    loc += 1
                return inter_df
            assign_map = {
                colname: new_cols[colname] for colname in new_cols.columns
            }
            return df.assign(**assign_map)
        raise TypeError(  # pragma: no cover
            "Unexpected type generated by applying a function to a DataFrame."
            " Only Series and DataFrame are allowed."
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
new_name = colname + "_log"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            new_col = source_col
            if self._non_neg:
                minval = min(new_col)
                if minval < 0:
                    new_col = new_col + abs(minval)
                    self._col_to_minval[colname] = abs(minval)
            # must check not None as neg numbers eval to False
            if self._const_shift is not None:
                new_col = new_col + self._const_shift
            new_col = np.log(new_col)
            inter_df = out_of_place_col_insert(
                df=inter_df, series=new_col, loc=loc, column_name=new_name
            )
        self.is_fitted = True
        return inter_df
github pdpipe / pdpipe / pdpipe / sklearn_stages.py View on Github external
df.select_dtypes(include=["object", "category"]).columns
                ).difference(self._exclude_columns)
            )
        if verbose:
            columns_to_encode = tqdm.tqdm(columns_to_encode)
        inter_df = df
        for colname in columns_to_encode:
            lbl_enc = sklearn.preprocessing.LabelEncoder()
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_enc"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=lbl_enc.fit_transform(source_col),
                loc=loc,
                column_name=new_name,
            )
            self.encoders[colname] = lbl_enc
        self.is_fitted = True
        return inter_df
github pdpipe / pdpipe / pdpipe / nltk_stages.py View on Github external
def _transform(self, df, verbose):
        inter_df = df
        for colname in self._columns:
            source_col = df[colname]
            loc = df.columns.get_loc(colname) + 1
            new_name = colname + "_norare"
            if self._drop:
                inter_df = inter_df.drop(colname, axis=1)
                new_name = colname
                loc -= 1
            rare_remover = self._rare_removers[colname]
            inter_df = out_of_place_col_insert(
                df=inter_df,
                series=source_col.map(rare_remover),
                loc=loc,
                column_name=new_name)
        return inter_df
github pdpipe / pdpipe / pdpipe / col_generation.py View on Github external
def _transform(self, df, verbose):
        new_cols = df.apply(self._func, axis=1)
        if isinstance(new_cols, pd.Series):
            loc = len(df.columns)
            if self._follow_column:
                loc = df.columns.get_loc(self._follow_column) + 1
            return out_of_place_col_insert(
                df=df, series=new_cols, loc=loc, column_name=self._colname
            )
        elif isinstance(new_cols, pd.DataFrame):
            sorted_cols = sorted(list(new_cols.columns))
            new_cols = new_cols[sorted_cols]
            if self._follow_column:
                inter_df = df
                loc = df.columns.get_loc(self._follow_column) + 1
                for colname in new_cols.columns:
                    inter_df = out_of_place_col_insert(
                        df=inter_df,
                        series=new_cols[colname],
                        loc=loc,
                        column_name=colname,
                    )
                    loc += 1