Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from ..under_sampling import RandomUnderSampler
from ..under_sampling.base import BaseUnderSampler
from ..utils import Substitution, check_target_type
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring
from ..pipeline import Pipeline
MAX_INT = np.iinfo(np.int32).max
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class EasyEnsembleClassifier(BaggingClassifier):
"""Bag of balanced boosted learners also known as EasyEnsemble.
This algorithm is known as EasyEnsemble [1]_. The classifier is an
ensemble of AdaBoost learners trained on different balanced boostrap
samples. The balancing is achieved by random under-sampling.
Read more in the :ref:`User Guide `.
Parameters
----------
n_estimators : int, default=10
# Authors: Guillaume Lemaitre
# Fernando Nogueira
# Christos Aridas
# License: MIT
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class TomekLinks(BaseCleaningSampler):
"""Under-sampling by removing Tomek's links.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{n_jobs}
Attributes
----------
import numpy as np
from scipy.sparse import issparse
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class CondensedNearestNeighbour(BaseCleaningSampler):
"""Undersample based on the condensed nearest neighbour method.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}
n_neighbors : int or object, default=\
import numpy as np
from scipy.stats import mode
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ._edited_nearest_neighbours import EditedNearestNeighbours
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
SEL_KIND = ("all", "mode")
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class NeighbourhoodCleaningRule(BaseCleaningSampler):
"""Undersample based on the neighbourhood cleaning rule.
This class uses ENN and a k-NN to remove noisy samples from the datasets.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
import numpy as np
from sklearn.base import clone
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._base import _set_random_states
from sklearn.utils import _safe_indexing
from ..under_sampling.base import BaseUnderSampler
from ..under_sampling import RandomUnderSampler
from ..pipeline import make_pipeline
from ..utils import Substitution, check_target_type
from ..utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
random_state=_random_state_docstring,
)
class RUSBoostClassifier(AdaBoostClassifier):
"""Random under-sampling integrated in the learning of AdaBoost.
During learning, the problem of class balancing is alleviated by random
under-sampling the sample at each iteration of the boosting algorithm.
Read more in the :ref:`User Guide `.
Parameters
----------
base_estimator : object, default=None
The base estimator from which the boosted ensemble is built.
Support for sample weighting is required, as well as proper
from scipy import sparse
from sklearn.base import clone
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing
from ..base import BaseUnderSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
VOTING_KIND = ("auto", "hard", "soft")
@Substitution(
sampling_strategy=BaseUnderSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class ClusterCentroids(BaseUnderSampler):
"""Undersample by generating centroids based on clustering methods.
Method that under samples the majority class by replacing a
cluster of majority samples by the cluster centroid of a KMeans
algorithm. This algorithm keeps N majority samples by fitting the
KMeans algorithm with N cluster to the majority class and using
the coordinates of the N cluster centroids as the new majority
samples.
Read more in the :ref:`User Guide `.
# License: MIT
import numpy as np
from scipy import sparse
from sklearn.utils import check_random_state
from sklearn.utils import _safe_indexing
from .base import BaseOverSampler
from ..utils import check_neighbors_object
from ..utils import Substitution
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class ADASYN(BaseOverSampler):
"""Oversample using Adaptive Synthetic (ADASYN) algorithm.
This method is similar to SMOTE but it generates different number of
samples depending on an estimate of the local distribution of the class
to be oversampled.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
# License: MIT
from sklearn.base import clone
from sklearn.utils import check_X_y
from ..base import BaseSampler
from ..over_sampling import SMOTE
from ..over_sampling.base import BaseOverSampler
from ..under_sampling import EditedNearestNeighbours
from ..utils import check_target_type
from ..utils import Substitution
from ..utils._docstring import _n_jobs_docstring
from ..utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class SMOTEENN(BaseSampler):
"""Over-sampling using SMOTE and cleaning using ENN.
Combine over- and under-sampling using SMOTE and Edited Nearest Neighbours.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}
# Deformation
parallel_point_position = np.dot(point, parallel_unit_vector) * parallel_unit_vector
perpendicular_point_position = point - parallel_point_position
point = (
parallel_point_position
+ (1 - deformation_factor) * perpendicular_point_position
)
# Translation
point = center + radius * point
return point
@Substitution(
sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
random_state=_random_state_docstring,
)
class GeometricSMOTE(BaseOverSampler):
"""Class to to perform over-sampling using Geometric SMOTE.
This algorithm is an implementation of Geometric SMOTE, a geometrically
enhanced drop-in replacement for SMOTE as presented in [1]_.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}