How to use the pyod.models.feature_bagging.FeatureBagging function in pyod

To help you get started, we’ve selected a few pyod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / examples / module_examples / M2_PSA / demo_pseudo_sup_approximation.py View on Github external
print("\n... Processing", mat_file_name, '...')
mat = sp.io.loadmat(os.path.join('../datasets', mat_file))

X = mat['X']
y = mat['y'].ravel()
outliers_fraction = np.sum(y) / len(y)
X = StandardScaler().fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

classifiers = {
    'Angle-based Outlier Detector (ABOD)': ABOD(n_neighbors=10,
                                                contamination=outliers_fraction),
    'Cluster-based Local Outlier Factor (CBLOF)':
        CBLOF(contamination=outliers_fraction, check_estimator=False),
    'Feature Bagging': FeatureBagging(LOF(), contamination=outliers_fraction),
    'Histogram-base Outlier Detection (HBOS)': HBOS(
        contamination=outliers_fraction),
    'Isolation Forest': IForest(contamination=outliers_fraction),
    'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
    'Average KNN': KNN(method='mean', contamination=outliers_fraction),
    'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction),
    'Minimum Covariance Determinant (MCD)': MCD(
        contamination=outliers_fraction),
    'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
    'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction)
}

stat_mat_all = np.zeros([len(classifiers), 10])
report_list = ['train_roc_orig', 'train_p@n_orig', 'train_roc_psa',
               'train_p@n_psa', 
               'test_time_orig', 'test_roc_orig', 'test_p@n_orig',
github yzhao062 / pyod / examples / feature_bagging_example.py View on Github external
if __name__ == "__main__":
    contamination = 0.1  # percentage of outliers
    n_train = 200  # number of training points
    n_test = 100  # number of testing points

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train Feature Bagging detector
    clf_name = 'FeatureBagging'
    clf = FeatureBagging()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)
github yzhao062 / pyod / examples / feature_bagging_example.py View on Github external
if __name__ == "__main__":
    contamination = 0.1  # percentage of outliers
    n_train = 2000  # number of training points
    n_test = 100  # number of testing points

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train ABOD detector
    clf_name = 'FeatureBagging'
    clf = FeatureBagging()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)
github yzhao062 / pyod / notebooks / benchmark.py View on Github external
# 60% data for training and 40% for testing
        X_train, X_test, y_train, y_test = \
            train_test_split(X, y, test_size=0.4, random_state=random_state)

        # standardizing data for processing
        X_train_norm, X_test_norm = standardizer(X_train, X_test)

        classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
            contamination=outliers_fraction),
            'Cluster-based Local Outlier Factor': CBLOF(
                n_clusters=10,
                contamination=outliers_fraction,
                check_estimator=False,
                random_state=random_state),
            'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                              random_state=random_state),
            'Histogram-base Outlier Detection (HBOS)': HBOS(
                contamination=outliers_fraction),
            'Isolation Forest': IForest(contamination=outliers_fraction,
                                        random_state=random_state),
            'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
            'Local Outlier Factor (LOF)': LOF(
                contamination=outliers_fraction),
            'Minimum Covariance Determinant (MCD)': MCD(
                contamination=outliers_fraction, random_state=random_state),
            'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
            'Principal Component Analysis (PCA)': PCA(
                contamination=outliers_fraction, random_state=random_state),
        }
        classifiers_indices = {
            'Angle-based Outlier Detector (ABOD)': 0,
github yzhao062 / pyod / pyod / models / feature_bagging.py View on Github external
def __init__(self, base_estimator=None, n_estimators=10, contamination=0.1,
                 max_features=1.0, bootstrap_features=False,
                 check_detector=True, check_estimator=False, n_jobs=1,
                 random_state=None, combination='average', verbose=0,
                 estimator_params=None):

        super(FeatureBagging, self).__init__(contamination=contamination)
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.bootstrap_features = bootstrap_features
        self.check_detector = check_detector
        self.check_estimator = check_estimator
        self.combination = combination
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.verbose = verbose
        if estimator_params is not None:
            self.estimator_params = estimator_params
        else:
            self.estimator_params = {}
github yzhao062 / pyod / examples / compare_all_models.py View on Github external
print('Number of outliers: %i' % n_outliers)
print(
    'Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(
        shape=ground_truth.shape))
print(ground_truth, '\n')

random_state = np.random.RandomState(42)
# Define nine outlier detection tools to be compared
classifiers = {
    'Angle-based Outlier Detector (ABOD)':
        ABOD(contamination=outliers_fraction),
    'Cluster-based Local Outlier Factor (CBLOF)':
        CBLOF(contamination=outliers_fraction,
              check_estimator=False, random_state=random_state),
    'Feature Bagging':
        FeatureBagging(LOF(n_neighbors=35),
                       contamination=outliers_fraction,
                       random_state=random_state),
    'Histogram-base Outlier Detection (HBOS)': HBOS(
        contamination=outliers_fraction),
    'Isolation Forest': IForest(contamination=outliers_fraction,
                                random_state=random_state),
    'K Nearest Neighbors (KNN)': KNN(
        contamination=outliers_fraction),
    'Average KNN': KNN(method='mean',
                       contamination=outliers_fraction),
    # 'Median KNN': KNN(method='median',
    #                   contamination=outliers_fraction),
    'Local Outlier Factor (LOF)':
        LOF(n_neighbors=35, contamination=outliers_fraction),
    # 'Local Correlation Integral (LOCI)':
    #     LOCI(contamination=outliers_fraction),
github yzhao062 / SUOD / examples / module_examples / M3_BPS / demo_balanced_scheduling.py View on Github external
n_estimators_total = 500
    
    mat_file = 'cardio.mat'
    mat_file_name = mat_file.replace('.mat', '')
    print("\n... Processing", mat_file_name, '...')
    mat = sp.io.loadmat(os.path.join('../datasets', mat_file))
    
    X = mat['X']
    y = mat['y']
    
    X = StandardScaler().fit_transform(X)
        
    classifiers = {
        1: ABOD(n_neighbors=10),
        2: CBLOF(check_estimator=False),
        3: FeatureBagging(LOF()),
        4: HBOS(),
        5: IForest(),
        6: KNN(),
        7: LOF(),
        8: MCD(),
        9: OCSVM(),
        10: PCA(),
    }
    
    idx_clf_mapping = {
        1: 'ABOD',
        2: 'CBLOF',
        3: 'FeatureBagging',
        4: 'HBOS',
        5: 'IForest',
        6: 'KNN',