How to use the pyod.models.pca.PCA function in pyod

To help you get started, we’ve selected a few pyod examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github yzhao062 / SUOD / examples / demo_base.py View on Github external
# standardize data to be digestible for most algorithms
    X = StandardScaler().fit_transform(X)

    X_train, X_test, y_train, y_test = \
        train_test_split(X, y, test_size=0.4, random_state=42)

    contamination = y.sum() / len(y)

    base_estimators = [
        LOF(n_neighbors=5, contamination=contamination),
        LOF(n_neighbors=15, contamination=contamination),
        LOF(n_neighbors=25, contamination=contamination),
        LOF(n_neighbors=35, contamination=contamination),
        LOF(n_neighbors=45, contamination=contamination),
        HBOS(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        KNN(n_neighbors=5, contamination=contamination),
        KNN(n_neighbors=15, contamination=contamination),
        KNN(n_neighbors=25, contamination=contamination),
        KNN(n_neighbors=35, contamination=contamination),
        KNN(n_neighbors=45, contamination=contamination),
        IForest(n_estimators=50, contamination=contamination),
        IForest(n_estimators=100, contamination=contamination),
        LOF(n_neighbors=5, contamination=contamination),
        LOF(n_neighbors=15, contamination=contamination),
        LOF(n_neighbors=25, contamination=contamination),
        LOF(n_neighbors=35, contamination=contamination),
        LOF(n_neighbors=45, contamination=contamination),
        HBOS(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
github yzhao062 / pyod / examples / pca_example.py View on Github external
if __name__ == "__main__":
    contamination = 0.1  # percentage of outliers
    n_train = 200  # number of training points
    n_test = 100  # number of testing points

    # Generate sample data
    X_train, y_train, X_test, y_test = \
        generate_data(n_train=n_train,
                      n_test=n_test,
                      n_features=2,
                      contamination=contamination,
                      random_state=42)

    # train PCA detector
    clf_name = 'PCA'
    clf = PCA()
    clf.fit(X_train)

    # get the prediction labels and outlier scores of the training data
    y_train_pred = clf.labels_  # binary labels (0: inliers, 1: outliers)
    y_train_scores = clf.decision_scores_  # raw outlier scores

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # evaluate and print the results
    print("\nOn Training Data:")
    evaluate_print(clf_name, y_train, y_train_scores)
    print("\nOn Test Data:")
    evaluate_print(clf_name, y_test, y_test_scores)
github yzhao062 / SUOD / examples / temp_do_not_use_work_w_minist.py View on Github external
HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        PCA(contamination=contamination),
        OCSVM(contamination=contamination),
        
        KNN(n_neighbors=5, contamination=contamination),
        KNN(n_neighbors=15, contamination=contamination),
        KNN(n_neighbors=25, contamination=contamination),
        KNN(n_neighbors=35, contamination=contamination),
        KNN(n_neighbors=45, contamination=contamination),
        KNN(n_neighbors=5, contamination=contamination),
        KNN(n_neighbors=15, contamination=contamination),
        KNN(n_neighbors=25, contamination=contamination),
        KNN(n_neighbors=35, contamination=contamination),
        KNN(n_neighbors=45, contamination=contamination),
github yzhao062 / SUOD / suod / utils / utility.py View on Github external
HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),
        HBOS(contamination=contamination),

        PCA(contamination=contamination),
        PCA(contamination=contamination),
        PCA(contamination=contamination),
        PCA(contamination=contamination),
        PCA(contamination=contamination),
        PCA(contamination=contamination),
        PCA(contamination=contamination),
        PCA(contamination=contamination),

        KNN(n_neighbors=5, contamination=contamination),
        KNN(n_neighbors=15, contamination=contamination),
        KNN(n_neighbors=25, contamination=contamination),
        KNN(n_neighbors=35, contamination=contamination),
        KNN(n_neighbors=45, contamination=contamination),
        KNN(n_neighbors=50, contamination=contamination),
        KNN(n_neighbors=55, contamination=contamination),
        KNN(n_neighbors=65, contamination=contamination),
        KNN(n_neighbors=75, contamination=contamination),
github yzhao062 / SUOD / examples / module_examples / M2_PSA / demo_pseudo_sup_approximation.py View on Github external
classifiers = {
    'Angle-based Outlier Detector (ABOD)': ABOD(n_neighbors=10,
                                                contamination=outliers_fraction),
    'Cluster-based Local Outlier Factor (CBLOF)':
        CBLOF(contamination=outliers_fraction, check_estimator=False),
    'Feature Bagging': FeatureBagging(LOF(), contamination=outliers_fraction),
    'Histogram-base Outlier Detection (HBOS)': HBOS(
        contamination=outliers_fraction),
    'Isolation Forest': IForest(contamination=outliers_fraction),
    'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
    'Average KNN': KNN(method='mean', contamination=outliers_fraction),
    'Local Outlier Factor (LOF)': LOF(contamination=outliers_fraction),
    'Minimum Covariance Determinant (MCD)': MCD(
        contamination=outliers_fraction),
    'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
    'Principal Component Analysis (PCA)': PCA(contamination=outliers_fraction)
}

stat_mat_all = np.zeros([len(classifiers), 10])
report_list = ['train_roc_orig', 'train_p@n_orig', 'train_roc_psa',
               'train_p@n_psa', 
               'test_time_orig', 'test_roc_orig', 'test_p@n_orig', 
               'test_time_psa', 'test_roc_psa', 'test_p@n_psa']

classifier_names = ['ABOD', 'CBLOF', 'FB', 'HBOS', 'IF', 'KNN', 'AKNN', 'LOF',
                    'MCD', 'OCSVM', 'PCA']

for j in range(n_iter):
    stat_mat = np.zeros([len(classifiers), 10])

    for i, (clf_name, clf) in enumerate(classifiers.items()):
        ################## original version
github yzhao062 / SUOD / examples / do_not_use_demo_full.py View on Github external
PCA(contamination=contamination),
    OCSVM(contamination=contamination),
    KNN(n_neighbors=5, contamination=contamination),
    KNN(n_neighbors=15, contamination=contamination),
    KNN(n_neighbors=25, contamination=contamination),
    KNN(n_neighbors=35, contamination=contamination),
    KNN(n_neighbors=45, contamination=contamination),
    IForest(n_estimators=50, contamination=contamination),
    IForest(n_estimators=100, contamination=contamination),
    LOF(n_neighbors=5, contamination=contamination),
    LOF(n_neighbors=15, contamination=contamination),
    LOF(n_neighbors=25, contamination=contamination),
    LOF(n_neighbors=35, contamination=contamination),
    LOF(n_neighbors=45, contamination=contamination),
    HBOS(contamination=contamination),
    PCA(contamination=contamination),
    OCSVM(contamination=contamination),
    KNN(n_neighbors=5, contamination=contamination),
    KNN(n_neighbors=15, contamination=contamination),
    KNN(n_neighbors=25, contamination=contamination),
    KNN(n_neighbors=35, contamination=contamination),
    KNN(n_neighbors=45, contamination=contamination),
    IForest(n_estimators=50, contamination=contamination),
    IForest(n_estimators=100, contamination=contamination),
    LOF(n_neighbors=5, contamination=contamination),
    LOF(n_neighbors=15, contamination=contamination),
    LOF(n_neighbors=25, contamination=contamination),
    LOF(n_neighbors=35, contamination=contamination),
    LOF(n_neighbors=45, contamination=contamination),
    HBOS(contamination=contamination),
    PCA(contamination=contamination),
    OCSVM(contamination=contamination),
github yzhao062 / pyod / notebooks / benchmark.py View on Github external
contamination=outliers_fraction,
                check_estimator=False,
                random_state=random_state),
            'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                              random_state=random_state),
            'Histogram-base Outlier Detection (HBOS)': HBOS(
                contamination=outliers_fraction),
            'Isolation Forest': IForest(contamination=outliers_fraction,
                                        random_state=random_state),
            'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
            'Local Outlier Factor (LOF)': LOF(
                contamination=outliers_fraction),
            'Minimum Covariance Determinant (MCD)': MCD(
                contamination=outliers_fraction, random_state=random_state),
            'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
            'Principal Component Analysis (PCA)': PCA(
                contamination=outliers_fraction, random_state=random_state),
        }
        classifiers_indices = {
            'Angle-based Outlier Detector (ABOD)': 0,
            'Cluster-based Local Outlier Factor': 1,
            'Feature Bagging': 2,
            'Histogram-base Outlier Detection (HBOS)': 3,
            'Isolation Forest': 4,
            'K Nearest Neighbors (KNN)': 5,
            'Local Outlier Factor (LOF)': 6,
            'Minimum Covariance Determinant (MCD)': 7,
            'One-class SVM (OCSVM)': 8,
            'Principal Component Analysis (PCA)': 9,
        }

        for clf_name, clf in classifiers.items():
github yzhao062 / pyod / pyod / models / pca.py View on Github external
def __init__(self, n_components=None, n_selected_components=None,
                 contamination=0.1, copy=True, whiten=False, svd_solver='auto',
                 tol=0.0, iterated_power='auto', random_state=None,
                 weighted=True, standardization=True):

        super(PCA, self).__init__(contamination=contamination)
        self.n_components = n_components
        self.n_selected_components = n_selected_components
        self.copy = copy
        self.whiten = whiten
        self.svd_solver = svd_solver
        self.tol = tol
        self.iterated_power = iterated_power
        self.random_state = random_state
        self.weighted = weighted
        self.standardization = standardization
github yzhao062 / pyod / examples / compare_all_models.py View on Github external
'Isolation Forest': IForest(contamination=outliers_fraction,
                                random_state=random_state),
    'K Nearest Neighbors (KNN)': KNN(
        contamination=outliers_fraction),
    'Average KNN': KNN(method='mean',
                       contamination=outliers_fraction),
    # 'Median KNN': KNN(method='median',
    #                   contamination=outliers_fraction),
    'Local Outlier Factor (LOF)':
        LOF(n_neighbors=35, contamination=outliers_fraction),
    # 'Local Correlation Integral (LOCI)':
    #     LOCI(contamination=outliers_fraction),
    'Minimum Covariance Determinant (MCD)': MCD(
        contamination=outliers_fraction, random_state=random_state),
    'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
    'Principal Component Analysis (PCA)': PCA(
        contamination=outliers_fraction, random_state=random_state),
    # 'Stochastic Outlier Selection (SOS)': SOS(
    #     contamination=outliers_fraction),
    'Locally Selective Combination (LSCP)': LSCP(
        detector_list, contamination=outliers_fraction,
        random_state=random_state),
    # 'Connectivity-Based Outlier Factor (COF)':
    #     COF(n_neighbors=35, contamination=outliers_fraction),
    # 'Subspace Outlier Detection (SOD)':
    #     SOD(contamination=outliers_fraction),
}

# Show all detectors
for i, clf in enumerate(classifiers.keys()):
    print('Model', i + 1, clf)