Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import numpy as np
from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._base import _set_random_states
from sklearn.utils import _safe_indexing
from ..under_sampling.base import BaseUnderSampler
from ..under_sampling import RandomUnderSampler
from ..pipeline import make_pipeline
from ..utils import Substitution, check_target_type
from ..utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
random_state=_random_state_docstring,
)
class RUSBoostClassifier(AdaBoostClassifier):
"""Random under-sampling integrated in the learning of AdaBoost.
During learning, the problem of class balancing is alleviated by random
under-sampling the sample at each iteration of the boosting algorithm.
Read more in the :ref:`User Guide `.
Parameters
----------
base_estimator : object, default=None
The base estimator from which the boosted ensemble is built.
Support for sample weighting is required, as well as proper
``classes_`` and ``n_classes_`` attributes. If ``None``, then
tree,
forest,
X_resampled,
y_resampled,
sample_weight,
tree_idx,
n_trees,
verbose=verbose,
class_weight=class_weight,
n_samples_bootstrap=n_samples_bootstrap,
)
return sampler, tree
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class BalancedRandomForestClassifier(RandomForestClassifier):
"""A balanced random forest classifier.
A balanced random forest randomly under-samples each boostrap sample to
balance it.
Read more in the :ref:`User Guide `.
Parameters
----------
n_estimators : int, default=100
The number of trees in the forest.
import warnings
from collections import Counter
import numpy as np
from sklearn.utils import _safe_indexing
from ..base import BaseUnderSampler
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class NearMiss(BaseUnderSampler):
"""Class to perform under-sampling based on NearMiss methods.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
version : int, default=1
Version of the NearMiss to use. Possible values are 1, 2 or 3.
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
from imblearn.under_sampling.base import BaseUnderSampler
from sklearn.utils import check_random_state
from sklearn.utils import safe_indexing
from ..sampling.bootstrapping import get_ind_matrix, seq_bootstrap
from sklearn.utils.multiclass import check_classification_targets
from imblearn.utils import check_sampling_strategy
from sklearn.preprocessing import label_binarize
import numpy as np
class SequentialBootstrappingSampler(BaseUnderSampler):
def __init__(self,
sampling_strategy='auto',
return_indices=False,
random_state=None,
ratio=None):
super().__init__(
sampling_strategy=sampling_strategy, ratio=ratio)
self.random_state = random_state
self.return_indices = return_indices
self.sample_indices_ = None
def fit_resample(self, X, y, **kwargs):
check_classification_targets(y)
X, y, binarize_y = self._check_X_y(X, y)
self.sampling_strategy_ = check_sampling_strategy(
from sklearn.base import ClassifierMixin, clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, safe_indexing
from sklearn.model_selection import cross_val_predict
from sklearn.utils.deprecation import deprecated
from .base import BaseEnsembleSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import check_sampling_strategy
from ..utils import Substitution
from ..utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
random_state=_random_state_docstring)
@deprecated('BalanceCascade is deprecated in 0.4 and will be removed in 0.6.')
class BalanceCascade(BaseEnsembleSampler):
"""Create an ensemble of balanced sets by iteratively under-sampling the
imbalanced dataset using an estimator.
This method iteratively select subset and make an ensemble of the
different sets. The selection is performed using a specific classifier.
Parameters
----------
{sampling_strategy}
return_indices : bool, optional (default=True)
Whether or not to return the indices of the samples randomly
selected from the majority class.
# in this test we will force all samplers to not change the class 1
X, y = make_classification(
n_samples=1000,
n_classes=3,
n_informative=4,
weights=[0.2, 0.3, 0.5],
random_state=0,
)
sampler = Sampler()
expected_stat = Counter(y)[1]
if isinstance(sampler, BaseOverSampler):
sampling_strategy = {2: 498, 0: 498}
sampler.set_params(sampling_strategy=sampling_strategy)
X_res, y_res = sampler.fit_resample(X, y)
assert Counter(y_res)[1] == expected_stat
elif isinstance(sampler, BaseUnderSampler):
sampling_strategy = {2: 201, 0: 201}
sampler.set_params(sampling_strategy=sampling_strategy)
X_res, y_res = sampler.fit_resample(X, y)
assert Counter(y_res)[1] == expected_stat
elif isinstance(sampler, BaseCleaningSampler):
sampling_strategy = [2, 0]
sampler.set_params(sampling_strategy=sampling_strategy)
X_res, y_res = sampler.fit_resample(X, y)
assert Counter(y_res)[1] == expected_stat
import numpy as np
from sklearn.utils import _safe_indexing
from ..base import BaseUnderSampler
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class NearMiss(BaseUnderSampler):
"""Class to perform under-sampling based on NearMiss methods.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
version : int, default=1
Version of the NearMiss to use. Possible values are 1, 2 or 3.
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
average distance to the minority point samples. If object, an
estimator that inherits from
:class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
import numpy as np
from sklearn.base import clone
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from ..pipeline import Pipeline
from ..under_sampling import RandomUnderSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import Substitution, check_target_type
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class BalancedBaggingClassifier(BaggingClassifier):
"""A Bagging classifier with additional balancing.
This implementation of Bagging is similar to the scikit-learn
implementation. It includes an additional step to balance the training set
at fit time using a ``RandomUnderSampler``.
Read more in the :ref:`User Guide `.
Parameters
----------
base_estimator : object, default=None
The base estimator to fit on random subsets of the dataset.
from sklearn.model_selection import cross_val_predict
from sklearn.utils import check_random_state
from sklearn.utils import _safe_indexing
from ..base import BaseUnderSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class InstanceHardnessThreshold(BaseUnderSampler):
"""Undersample based on the instance hardness threshold.
Read more in the :ref:`User Guide `.
Parameters
----------
estimator : object, default=None
Classifier to be used to estimate instance hardness of the samples. By
default a :class:`sklearn.ensemble.RandomForestClassifier` will be used.
If ``str``, the choices using a string are the following: ``'knn'``,
``'decision-tree'``, ``'random-forest'``, ``'adaboost'``,
``'gradient-boosting'`` and ``'linear-svm'``. If object, an estimator
inherited from :class:`sklearn.base.ClassifierMixin` and having an
attribute :func:`predict_proba`.
{sampling_strategy}
import numpy as np
from sklearn.utils import check_array
from sklearn.utils import check_consistent_length
from sklearn.utils import check_random_state
from sklearn.utils import _safe_indexing
from ..base import BaseUnderSampler
from ...utils import check_target_type
from ...utils import Substitution
from ...utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
random_state=_random_state_docstring,
)
class RandomUnderSampler(BaseUnderSampler):
"""Class to perform random under-sampling.
Under-sample the majority class(es) by randomly picking samples
with or without replacement.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}