How to use the mlxtend.feature_selection.SequentialFeatureSelector function in mlxtend

To help you get started, we’ve selected a few mlxtend examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github uw-cmg / MAST-ML / FeatureSelection.py View on Github external
def sequential_forward_selection(self, number_features_to_keep):
        """
        Does forward selection
        """ # TODO: write better docstring
        sfs = SFS(self.model, k_features=number_features_to_keep, forward=True, floating=False,
                  verbose=0, scoring='neg_mean_squared_error', cv=KFold(n_splits=5, shuffle=True,
                  random_state=False))
        sfs = sfs.fit(X=np.array(self.dataframe[self.x_features]),
                      y=np.array(self.dataframe[self.y_feature]))
        sfs.fit_transform(X=np.array(self.dataframe[self.x_features]),
                          y=np.array(self.dataframe[self.y_feature]))
        feature_indices_selected = sfs.k_feature_idx_
        x_features_to_keep = []
        for index in feature_indices_selected:
            x_features_to_keep.append(self.x_features[index])

        dataframe = FeatureIO(dataframe=self.dataframe).keep_custom_features(features_to_keep=x_features_to_keep)
        # Add y_feature back into the dataframe
        dataframe = FeatureIO(dataframe=dataframe)\
            .add_custom_features(features_to_add=[self.y_feature],
                                 data_to_add=self.dataframe[self.y_feature])
github 1313e / PRISM / prism / emulator / _emulator.py View on Github external
else:
                # Obtain frozen+potentially active parameters list
                frz_pot_par = sset(active_par_data)
                frz_pot_par.update(self._pipeline._pot_active_par)
                frz_pot_par = list(frz_pot_par)
                frz_pot_idx = list(range(len(frz_pot_par)))

                # Obtain non-frozen potentially active parameters
                non_frz_par = [par for par in self._pipeline._pot_active_par
                               if par not in active_par_data]
                non_frz_idx = [frz_pot_par.index(par) for par in non_frz_par]

                # If non_frz_par has at least 1 element, carry out analysis
                if non_frz_par:
                    # Create SequentialFeatureSelector object
                    sfs_obj = SFS(LR(), k_features='parsimonious',
                                  forward=False, floating=False, scoring='r2',
                                  cv=self._n_cross_val)

                    # Obtain sam_set of frz_pot_par
                    frz_pot_sam_set = self._sam_set[emul_i][:, frz_pot_par]

                    # Obtain polynomial terms of frz_pot_sam_set
                    pf_obj = PF(self._poly_order, include_bias=False)
                    frz_pot_poly_terms = pf_obj.fit_transform(frz_pot_sam_set)

                    # Perform linear regression with linear terms only
                    sfs_obj.fit(frz_pot_sam_set, self._mod_set[emul_i][emul_s])

                    # Extract active parameters due to linear significance
                    act_idx_lin = list(sfs_obj.k_feature_idx_)
github uw-cmg / MAST-ML / mastml / legos / feature_selectors.py View on Github external
#    plt.plot(Xdata, ydata, '-o', color='r', label='Avg RMSE 10 tests 5-fold CV')
    #    plt.fill_between(Xdata, np.array(ydata) - np.array(yspread),
    #                     np.array(ydata) + np.array(yspread), alpha=0.1,
    #                     color="r")
    #    plt.xlabel("Number of features")
    #    plt.ylabel("RMSE")
    #    plt.legend(loc="best")
    #    plt.savefig(savedir + "/" + "basic_forward_selection_learning_curve_featurenumber.png", dpi=250)
    #    return


# Include Principal Component Analysis
PCA.transform = dataframify_new_column_names(PCA.transform, 'pca_')

# Include Sequential Forward Selector
SequentialFeatureSelector.transform = dataframify_new_column_names(SequentialFeatureSelector.transform, 'sfs_')
SequentialFeatureSelector.fit = fitify_just_use_values(SequentialFeatureSelector.fit)
model_selectors['SequentialFeatureSelector'] = SequentialFeatureSelector
name_to_constructor['SequentialFeatureSelector'] = SequentialFeatureSelector

# Custom selectors don't need to be dataframified
name_to_constructor.update({
    # 'PassThrough': PassThrough,
    'DoNothing': util_legos.DoNothing,
    'PCA': PCA,
    'SequentialFeatureSelector': SequentialFeatureSelector,
    'MASTMLFeatureSelector': MASTMLFeatureSelector,
})
github uw-cmg / MAST-ML / mastml / legos / feature_selectors.py View on Github external
#    plt.fill_between(Xdata, np.array(ydata) - np.array(yspread),
    #                     np.array(ydata) + np.array(yspread), alpha=0.1,
    #                     color="r")
    #    plt.xlabel("Number of features")
    #    plt.ylabel("RMSE")
    #    plt.legend(loc="best")
    #    plt.savefig(savedir + "/" + "basic_forward_selection_learning_curve_featurenumber.png", dpi=250)
    #    return


# Include Principal Component Analysis
PCA.transform = dataframify_new_column_names(PCA.transform, 'pca_')

# Include Sequential Forward Selector
SequentialFeatureSelector.transform = dataframify_new_column_names(SequentialFeatureSelector.transform, 'sfs_')
SequentialFeatureSelector.fit = fitify_just_use_values(SequentialFeatureSelector.fit)
model_selectors['SequentialFeatureSelector'] = SequentialFeatureSelector
name_to_constructor['SequentialFeatureSelector'] = SequentialFeatureSelector

# Custom selectors don't need to be dataframified
name_to_constructor.update({
    # 'PassThrough': PassThrough,
    'DoNothing': util_legos.DoNothing,
    'PCA': PCA,
    'SequentialFeatureSelector': SequentialFeatureSelector,
    'MASTMLFeatureSelector': MASTMLFeatureSelector,
})
github 1313e / PRISM / prism / emulator / _emulator.py View on Github external
poly_idx : 1D :obj:`~numpy.ndarray` object
            Array containing the indices of the non-zero polynomial terms in
            the regression function.
        poly_coef_cov : 1D :obj:`~numpy.ndarray` object (if \
            :attr:`~use_regr_cov` is *True*)
            Array containing the covariance values of the non-zero polynomial
            coefficients.

        """

        # Create logger
        logger = getRLogger('REGRESSION')
        logger.info("Performing regression.")

        # Create SequentialFeatureSelector object
        sfs_obj = SFS(LR(), k_features='best', forward=True, floating=False,
                      scoring='neg_mean_squared_error',
                      cv=self._n_cross_val)

        # Create Scikit-learn Pipeline object
        # The bias/intercept/constant-term is not included in the SFS object to
        # ensure that it is taken into account in the linear regression, since
        # it is required for getting the residual variance. It also ensures
        # that the SFS does not focus on the constant-term in its calculations.
        pipe = Pipeline_sk([('poly', PF(self._poly_order, include_bias=False)),
                            ('SFS', sfs_obj),
                            ('linear', LR())])

        # Loop over all emulator systems and perform a regression on them
        for emul_s in emul_s_seq:
            # Extract active_sam_set
            active_sam_set = self._sam_set[emul_i][
github gumpy-bci / gumpy / gumpy / features.py View on Github external
# get all additional entries for the options
        # opts.update(kwopts)

        # retrieve a classifier object
        classifier_obj = available_classifiers[classifier](**opts)

        # extract the backend classifier
        clf = classifier_obj.clf
    else:
        # if we received a classifier object we'll just use this one
        clf = classifier.clf


    if selection_type == 'SFS':
        algorithm = "Sequential Forward Selection (SFS)"
        sfs = SFS(clf, k_features, forward=True, floating=False,
                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

    elif selection_type == 'SBS':
        algorithm = "Sequential Backward Selection (SBS)"
        sfs = SFS(clf, k_features, forward=False, floating=False,
                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

    elif selection_type == 'SFFS':
        algorithm = "Sequential Forward Floating Selection (SFFS)"
        sfs = SFS(clf, k_features, forward=True, floating=True,
                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

    elif selection_type == 'SBFS':
        algorithm = "Sequential Backward Floating Selection (SFFS)"
        sfs = SFS(clf, k_features, forward=False, floating=True,
                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)