Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
axis=0,
)
self.sample_indices_ = idx_under
return _safe_indexing(X, idx_under), _safe_indexing(y, idx_under)
def _more_tags(self):
return {"sample_indices": True}
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class RepeatedEditedNearestNeighbours(BaseCleaningSampler):
"""Undersample based on the repeated edited nearest neighbour method.
This method will repeat several time the ENN algorithm.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
:class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
find the nearest-neighbors.
import numpy as np
from scipy.stats import mode
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ._edited_nearest_neighbours import EditedNearestNeighbours
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
SEL_KIND = ("all", "mode")
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class NeighbourhoodCleaningRule(BaseCleaningSampler):
"""Undersample based on the neighbourhood cleaning rule.
This class uses ENN and a k-NN to remove noisy samples from the datasets.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
# Authors: Guillaume Lemaitre
# Fernando Nogueira
# Christos Aridas
# License: MIT
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class TomekLinks(BaseCleaningSampler):
"""Under-sampling by removing Tomek's links.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{n_jobs}
Attributes
----------
sample_indices_ : ndarray of shape (n_new_samples)
# License: MIT
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class TomekLinks(BaseCleaningSampler):
"""Under-sampling by removing Tomek's links.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{n_jobs}
Attributes
----------
sample_indices_ : ndarray of shape (n_new_samples)
Indices of the samples selected.
.. versionadded:: 0.4
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
SEL_KIND = ("all", "mode")
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class EditedNearestNeighbours(BaseCleaningSampler):
"""Undersample based on the edited nearest neighbour method.
This method will clean the database by removing samples close to the
decision boundary.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
:class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
find the nearest-neighbors.
import numpy as np
from scipy.sparse import issparse
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class CondensedNearestNeighbour(BaseCleaningSampler):
"""Undersample based on the condensed nearest neighbour method.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}
n_neighbors : int or object, default=\
KNeighborsClassifier(n_neighbors=1)
import numpy as np
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, _safe_indexing
from ..base import BaseCleaningSampler
from ._tomek_links import TomekLinks
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class OneSidedSelection(BaseCleaningSampler):
"""Class to perform under-sampling based on one-sided selection method.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}
n_neighbors : int or object, default=None
If ``int``, size of the neighbourhood to consider to compute the
weights=[0.2, 0.3, 0.5],
random_state=0,
)
sampler = Sampler()
expected_stat = Counter(y)[1]
if isinstance(sampler, BaseOverSampler):
sampling_strategy = {2: 498, 0: 498}
sampler.set_params(sampling_strategy=sampling_strategy)
X_res, y_res = sampler.fit_resample(X, y)
assert Counter(y_res)[1] == expected_stat
elif isinstance(sampler, BaseUnderSampler):
sampling_strategy = {2: 201, 0: 201}
sampler.set_params(sampling_strategy=sampling_strategy)
X_res, y_res = sampler.fit_resample(X, y)
assert Counter(y_res)[1] == expected_stat
elif isinstance(sampler, BaseCleaningSampler):
sampling_strategy = [2, 0]
sampler.set_params(sampling_strategy=sampling_strategy)
X_res, y_res = sampler.fit_resample(X, y)
assert Counter(y_res)[1] == expected_stat
import numpy as np
from scipy.stats import mode
from sklearn.utils import _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import check_neighbors_object
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
SEL_KIND = ("all", "mode")
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
)
class EditedNearestNeighbours(BaseCleaningSampler):
"""Undersample based on the edited nearest neighbour method.
This method will clean the database by removing samples close to the
decision boundary.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
n_neighbors : int or object, default=3
If ``int``, size of the neighbourhood to consider to compute the
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier
from sklearn.utils import check_random_state, _safe_indexing
from ..base import BaseCleaningSampler
from ...utils import Substitution
from ...utils._docstring import _n_jobs_docstring
from ...utils._docstring import _random_state_docstring
@Substitution(
sampling_strategy=BaseCleaningSampler._sampling_strategy_docstring,
n_jobs=_n_jobs_docstring,
random_state=_random_state_docstring,
)
class CondensedNearestNeighbour(BaseCleaningSampler):
"""Undersample based on the condensed nearest neighbour method.
Read more in the :ref:`User Guide `.
Parameters
----------
{sampling_strategy}
{random_state}
n_neighbors : int or object, default=\
KNeighborsClassifier(n_neighbors=1)
If ``int``, size of the neighbourhood to consider to compute the
nearest neighbors. If object, an estimator that inherits from
:class:`sklearn.neighbors.base.KNeighborsMixin` that will be used to
find the nearest-neighbors.