How to use the imblearn.over_sampling function in imblearn

To help you get started, we’ve selected a few imblearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scikit-learn-contrib / imbalanced-learn / examples / evaluation / plot_metrics.py View on Github external
from imblearn import pipeline as pl
from imblearn.metrics import (geometric_mean_score,
                              make_index_balanced_accuracy)

print(__doc__)

RANDOM_STATE = 42

# Generate a dataset
X, y = datasets.make_classification(n_classes=3, class_sep=2,
                                    weights=[0.1, 0.9], n_informative=10,
                                    n_redundant=1, flip_y=0, n_features=20,
                                    n_clusters_per_class=4, n_samples=5000,
                                    random_state=RANDOM_STATE)

pipeline = pl.make_pipeline(os.SMOTE(random_state=RANDOM_STATE),
                            LinearSVC(random_state=RANDOM_STATE))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    random_state=RANDOM_STATE)

# Train the classifier with balancing
pipeline.fit(X_train, y_train)

# Test the classifier and get the prediction
y_pred_bal = pipeline.predict(X_test)

###############################################################################
# The geometric mean corresponds to the square root of the product of the
# sensitivity and specificity. Combining the two metrics should account for
# the balancing of the dataset.
github daniel-muthukrishna / astrodash / astrodash / create_arrays.py View on Github external
def smote_oversample(self):
        sm = over_sampling.SMOTE(random_state=42, n_jobs=30)
        images, labels = sm.fit_sample(X=self.kwargShuf['images'], y=self.kwargShuf['labels'])

        self.kwargOverSampledShuf = self.shuffle_arrays(memmapName='oversampled_smote_{}'.format(self.randnum),
                                                        images=images, labels=labels)

        return self.kwargOverSampledShuf
github daniel-muthukrishna / astrodash / dash / create_arrays.py View on Github external
def smote_oversample(self):
        sm = over_sampling.SMOTE(random_state=42, n_jobs=30)
        images, labels = sm.fit_sample(X=self.kwargShuf['images'], y=self.kwargShuf['labels'])

        self.kwargOverSampledShuf = self.shuffle_arrays(memmapName='oversampled_smote_{}'.format(self.randnum), images=images, labels=labels)

        return self.kwargOverSampledShuf
github MStarmans91 / WORC / WORC / classification / ObjectSampler.py View on Github external
def init_RandomOverSampling(self, sampling_strategy):
        """Creata a random over sampler object."""
        self.object = over_sampling.RandomOverSampler(sampling_strategy=sampling_strategy,
                                                      random_state=self.random_state)
        self.sampling_strategy = sampling_strategy
github daniel-muthukrishna / astrodash / astrodash / create_arrays_with_memory_mapping.py View on Github external
def smote_oversample(self):
        sm = over_sampling.SMOTE(random_state=42, n_jobs=30)
        images, labels = sm.fit_sample(X=self.kwargShuf['images'], y=self.kwargShuf['labels'])

        self.kwargOverSampledShuf = self.shuffle_arrays(memmapName='oversampled_smote_{}'.format(self.randnum),
                                                        images=images, labels=labels)

        return self.kwargOverSampledShuf
github MStarmans91 / WORC / WORC / classification / ObjectSampler.py View on Github external
def init_BorderlineSMOTE(self, ratio, k_neighbors, kind, n_jobs):
        """Creata a BorderlineSMOTE sampler object."""
        self.object =\
            over_sampling.BorderlineSMOTE(random_state=self.random_state,
                                          ratio=ratio,
                                          k_neighbors=k_neighbors,
                                          kind=kind,
                                          n_jobs=n_jobs)

        self.ratio = ratio
        self.k_neighbors = k_neighbors
        self.kind = kind
        self.n_jobs = n_jobs
github scikit-learn-contrib / imbalanced-learn / examples / model_selection / plot_validation_curve.py View on Github external
from imblearn import over_sampling as os
from imblearn import pipeline as pl

print(__doc__)

RANDOM_STATE = 42

scorer = metrics.make_scorer(metrics.cohen_kappa_score)

# Generate the dataset
X, y = datasets.make_classification(n_classes=2, class_sep=2,
                                    weights=[0.1, 0.9], n_informative=10,
                                    n_redundant=1, flip_y=0, n_features=20,
                                    n_clusters_per_class=4, n_samples=5000,
                                    random_state=RANDOM_STATE)
smote = os.SMOTE(random_state=RANDOM_STATE)
cart = tree.DecisionTreeClassifier(random_state=RANDOM_STATE)
pipeline = pl.make_pipeline(smote, cart)

param_range = range(1, 11)
train_scores, test_scores = ms.validation_curve(
    pipeline, X, y, param_name="smote__k_neighbors", param_range=param_range,
    cv=3, scoring=scorer)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

plt.plot(param_range, test_scores_mean, label='SMOTE')