Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
model.fit(X_train) # fit all models with X
model.approximate(X_train) # conduct model approximation if it is enabled
predicted_labels = model.predict(X_test) # predict labels
predicted_scores = model.decision_function(X_test) # predict scores
predicted_probs = model.predict_proba(X_test) # predict scores
###########################################################################
# compared with other approaches
evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
evaluate_print('average', y_test, average(predicted_scores))
evaluate_print('maximization', y_test, maximization(predicted_scores))
clf = LOF()
clf.fit(X_train)
evaluate_print('LOF', y_test, clf.decision_function(X_test))
clf = IForest()
clf.fit(X_train)
evaluate_print('IForest', y_test, clf.decision_function(X_test))
for i in range(n_clf):
k = k_list[i]
clf = KNN(n_neighbors=k, method='largest')
clf.fit(X_train_norm)
train_scores[:, i] = clf.decision_scores_
test_scores[:, i] = clf.decision_function(X_test_norm)
# Decision scores have to be normalized before combination
train_scores_norm, test_scores_norm = standardizer(train_scores,
test_scores)
# Combination by average
y_by_average = average(test_scores_norm)
evaluate_print('Combination by Average', y_test, y_by_average)
# Combination by max
y_by_maximization = maximization(test_scores_norm)
evaluate_print('Combination by Maximization', y_test, y_by_maximization)
# Combination by max
y_by_maximization = median(test_scores_norm)
evaluate_print('Combination by Median', y_test, y_by_maximization)
# Combination by aom
y_by_aom = aom(test_scores_norm, n_buckets=5)
evaluate_print('Combination by AOM', y_test, y_by_aom)
# Combination by moa
y_by_moa = moa(test_scores_norm, n_buckets=5)
evaluate_print('Combination by MOA', y_test, y_by_moa)
clf = LOF()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test) # outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)
# visualize the results
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True, save_figure=False)
# train COF detector
clf_name = 'COF'
clf = COF(n_neighbors=30)
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test) # outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)
# visualize the results
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True, save_figure=False)
clf = OCSVM()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test) # outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)
# visualize the results
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True, save_figure=False)
# train HBOS detector
clf_name = 'HBOS'
clf = HBOS()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test) # outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)
# visualize the results
visualize(clf_name, X_train, y_train, X_test, y_test, y_train_pred,
y_test_pred, show_figure=True, save_figure=False)
clf = SO_GAAL(contamination=contamination)
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
y_test_scores = clf.decision_function(X_test) # outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
print("\nOn Test Data:")
evaluate_print(clf_name, y_test, y_test_scores)
print()
# unfold and generate the label matrix
predicted_scores_orig = np.zeros([X_test.shape[0], n_estimators])
for i in range(n_jobs):
predicted_scores_orig[:, starts[i]:starts[i + 1]] = np.asarray(
all_results_scores[i]).T
##########################################################################
predicted_scores = standardizer(predicted_scores)
predicted_scores_orig = standardizer(predicted_scores_orig)
evaluate_print('orig', y_test, average(predicted_scores_orig))
evaluate_print('new', y_test, average(predicted_scores))
evaluate_print('orig moa', y_test, moa(predicted_scores_orig))
evaluate_print('new moa', y_test, moa(predicted_scores))
verbose=True)
for i in range(n_jobs))
print('Orig decision_function time:', time.time() - start)
print()
# unfold and generate the label matrix
predicted_scores_orig = np.zeros([X.shape[0], n_estimators])
for i in range(n_jobs):
predicted_scores_orig[:, starts[i]:starts[i + 1]] = np.asarray(
all_results_scores[i]).T
##########################################################################
predicted_scores = standardizer(predicted_scores)
predicted_scores_orig = standardizer(predicted_scores_orig)
evaluate_print('orig', y_test, np.mean(predicted_scores_orig, axis=1))
evaluate_print('new', y_test, np.mean(predicted_scores, axis=1))
#%%
##########################################################################
start = time.time()
for i in range(n_estimators):
print(i)
trained_estimators[i].predict(X)
print('Orig decision_function time:', time.time() - start)
print()
##########################################################################
start = time.time()
for i in range(n_estimators):
for i in range(n_jobs))
print('Orig decision_function time:', time.time() - start)
print()
# unfold and generate the label matrix
predicted_scores_orig = np.zeros([X.shape[0], n_estimators])
for i in range(n_jobs):
predicted_scores_orig[:, starts[i]:starts[i + 1]] = np.asarray(
all_results_scores[i]).T
##########################################################################
predicted_scores = standardizer(predicted_scores)
predicted_scores_orig = standardizer(predicted_scores_orig)
evaluate_print('orig', y_test, np.mean(predicted_scores_orig, axis=1))
evaluate_print('new', y_test, np.mean(predicted_scores, axis=1))
#%%
##########################################################################
start = time.time()
for i in range(n_estimators):
print(i)
trained_estimators[i].predict(X)
print('Orig decision_function time:', time.time() - start)
print()
##########################################################################
start = time.time()
for i in range(n_estimators):
print(i)