How to use the imblearn.under_sampling.base.BaseUnderSampler function in imblearn

To help you get started, we’ve selected a few imblearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _weight_boosting.py View on Github external
import numpy as np

from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._base import _set_random_states
from sklearn.utils import _safe_indexing

from ..under_sampling.base import BaseUnderSampler
from ..under_sampling import RandomUnderSampler
from ..pipeline import make_pipeline
from ..utils import Substitution, check_target_type
from ..utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    random_state=_random_state_docstring,
)
class RUSBoostClassifier(AdaBoostClassifier):
    """Random under-sampling integrated in the learning of AdaBoost.

    During learning, the problem of class balancing is alleviated by random
    under-sampling the sample at each iteration of the boosting algorithm.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    base_estimator : object, default=None
        The base estimator from which the boosted ensemble is built.
        Support for sample weighting is required, as well as proper
        ``classes_`` and ``n_classes_`` attributes. If ``None``, then
github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _forest.py View on Github external
tree,
        forest,
        X_resampled,
        y_resampled,
        sample_weight,
        tree_idx,
        n_trees,
        verbose=verbose,
        class_weight=class_weight,
        n_samples_bootstrap=n_samples_bootstrap,
    )
    return sampler, tree


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class BalancedRandomForestClassifier(RandomForestClassifier):
    """A balanced random forest classifier.

    A balanced random forest randomly under-samples each boostrap sample to
    balance it.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    n_estimators : int, default=100
        The number of trees in the forest.
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _nearmiss.py View on Github external
import warnings
from collections import Counter

import numpy as np

from sklearn.utils import _safe_indexing

from ..base import BaseUnderSampler
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
)
class NearMiss(BaseUnderSampler):
    """Class to perform under-sampling based on NearMiss methods.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    version : int, default=1
        Version of the NearMiss to use. Possible values are 1, 2 or 3.

    n_neighbors : int or object, default=3
        If ``int``, size of the neighbourhood to consider to compute the
github hudson-and-thames / mlfinlab / mlfinlab / ensemble / sequential_bootstrap_resampler.py View on Github external
from imblearn.under_sampling.base import BaseUnderSampler
from sklearn.utils import check_random_state
from sklearn.utils import safe_indexing
from ..sampling.bootstrapping import get_ind_matrix, seq_bootstrap
from sklearn.utils.multiclass import check_classification_targets
from imblearn.utils import check_sampling_strategy
from sklearn.preprocessing import label_binarize
import numpy as np


class SequentialBootstrappingSampler(BaseUnderSampler):
    def __init__(self,
                 sampling_strategy='auto',
                 return_indices=False,
                 random_state=None,
                 ratio=None):
        super().__init__(
            sampling_strategy=sampling_strategy, ratio=ratio)
        self.random_state = random_state
        self.return_indices = return_indices
        self.sample_indices_ = None

    def fit_resample(self, X, y, **kwargs):
        check_classification_targets(y)
        X, y, binarize_y = self._check_X_y(X, y)

        self.sampling_strategy_ = check_sampling_strategy(
github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _balance_cascade.py View on Github external
from sklearn.base import ClassifierMixin, clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, safe_indexing
from sklearn.model_selection import cross_val_predict
from sklearn.utils.deprecation import deprecated

from .base import BaseEnsembleSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import check_sampling_strategy
from ..utils import Substitution
from ..utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    random_state=_random_state_docstring)
@deprecated('BalanceCascade is deprecated in 0.4 and will be removed in 0.6.')
class BalanceCascade(BaseEnsembleSampler):
    """Create an ensemble of balanced sets by iteratively under-sampling the
    imbalanced dataset using an estimator.

    This method iteratively select subset and make an ensemble of the
    different sets. The selection is performed using a specific classifier.

    Parameters
    ----------
    {sampling_strategy}

    return_indices : bool, optional (default=True)
        Whether or not to return the indices of the samples randomly
        selected from the majority class.
github scikit-learn-contrib / imbalanced-learn / imblearn / utils / estimator_checks.py View on Github external
# in this test we will force all samplers to not change the class 1
    X, y = make_classification(
        n_samples=1000,
        n_classes=3,
        n_informative=4,
        weights=[0.2, 0.3, 0.5],
        random_state=0,
    )
    sampler = Sampler()
    expected_stat = Counter(y)[1]
    if isinstance(sampler, BaseOverSampler):
        sampling_strategy = {2: 498, 0: 498}
        sampler.set_params(sampling_strategy=sampling_strategy)
        X_res, y_res = sampler.fit_resample(X, y)
        assert Counter(y_res)[1] == expected_stat
    elif isinstance(sampler, BaseUnderSampler):
        sampling_strategy = {2: 201, 0: 201}
        sampler.set_params(sampling_strategy=sampling_strategy)
        X_res, y_res = sampler.fit_resample(X, y)
        assert Counter(y_res)[1] == expected_stat
    elif isinstance(sampler, BaseCleaningSampler):
        sampling_strategy = [2, 0]
        sampler.set_params(sampling_strategy=sampling_strategy)
        X_res, y_res = sampler.fit_resample(X, y)
        assert Counter(y_res)[1] == expected_stat
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _nearmiss.py View on Github external
import numpy as np

from sklearn.utils import _safe_indexing

from ..base import BaseUnderSampler
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
)
class NearMiss(BaseUnderSampler):
    """Class to perform under-sampling based on NearMiss methods.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    version : int, default=1
        Version of the NearMiss to use. Possible values are 1, 2 or 3.

    n_neighbors : int or object, default=3
        If ``int``, size of the neighbourhood to consider to compute the
        average distance to the minority point samples.  If object, an
        estimator that inherits from
        :class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
github scikit-learn-contrib / imbalanced-learn / imblearn / ensemble / _bagging.py View on Github external
import numpy as np

from sklearn.base import clone
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

from ..pipeline import Pipeline
from ..under_sampling import RandomUnderSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import Substitution, check_target_type
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class BalancedBaggingClassifier(BaggingClassifier):
    """A Bagging classifier with additional balancing.

    This implementation of Bagging is similar to the scikit-learn
    implementation. It includes an additional step to balance the training set
    at fit time using a ``RandomUnderSampler``.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    base_estimator : object, default=None
        The base estimator to fit on random subsets of the dataset.
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _instance_hardness_threshold.py View on Github external
from sklearn.model_selection import cross_val_predict
from sklearn.utils import check_random_state
from sklearn.utils import _safe_indexing

from ..base import BaseUnderSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    n_jobs=_n_jobs_docstring,
    random_state=_random_state_docstring,
)
class InstanceHardnessThreshold(BaseUnderSampler):
    """Undersample based on the instance hardness threshold.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    estimator : object, default=None
        Classifier to be used to estimate instance hardness of the samples.  By
        default a :class:`sklearn.ensemble.RandomForestClassifier` will be used.
        If ``str``, the choices using a string are the following: ``'knn'``,
        ``'decision-tree'``, ``'random-forest'``, ``'adaboost'``,
        ``'gradient-boosting'`` and ``'linear-svm'``.  If object, an estimator
        inherited from :class:`sklearn.base.ClassifierMixin` and having an
        attribute :func:`predict_proba`.

    {sampling_strategy}
github scikit-learn-contrib / imbalanced-learn / imblearn / under_sampling / _prototype_selection / _random_under_sampler.py View on Github external
import numpy as np

from sklearn.utils import check_array
from sklearn.utils import check_consistent_length
from sklearn.utils import check_random_state
from sklearn.utils import _safe_indexing

from ..base import BaseUnderSampler
from ...utils import check_target_type
from ...utils import Substitution
from ...utils._docstring import _random_state_docstring


@Substitution(
    sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
    random_state=_random_state_docstring,
)
class RandomUnderSampler(BaseUnderSampler):
    """Class to perform random under-sampling.

    Under-sample the majority class(es) by randomly picking samples
    with or without replacement.

    Read more in the :ref:`User Guide `.

    Parameters
    ----------
    {sampling_strategy}

    {random_state}