Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True,
contamination=contamination, approx_flag_global=True)
model.fit(X_train) # fit all models with X
model.approximate(X_train) # conduct model approximation if it is enabled
predicted_labels = model.predict(X_test) # predict labels
predicted_scores = model.decision_function(X_test) # predict scores
predicted_probs = model.predict_proba(X_test) # predict scores
###########################################################################
# compared with other approaches
evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
evaluate_print('average', y_test, average(predicted_scores))
evaluate_print('maximization', y_test, maximization(predicted_scores))
clf = LOF()
clf.fit(X_train)
evaluate_print('LOF', y_test, clf.decision_function(X_test))
clf = IForest()
clf.fit(X_train)
evaluate_print('IForest', y_test, clf.decision_function(X_test))
LOF(n_neighbors=15, contamination=contamination),
LOF(n_neighbors=25, contamination=contamination),
LOF(n_neighbors=35, contamination=contamination),
LOF(n_neighbors=45, contamination=contamination),
HBOS(contamination=contamination),
PCA(contamination=contamination),
OCSVM(contamination=contamination),
KNN(n_neighbors=5, contamination=contamination),
KNN(n_neighbors=15, contamination=contamination),
KNN(n_neighbors=25, contamination=contamination),
KNN(n_neighbors=35, contamination=contamination),
KNN(n_neighbors=45, contamination=contamination),
IForest(n_estimators=50, contamination=contamination),
IForest(n_estimators=100, contamination=contamination),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)])
]
model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True,
contamination=contamination, approx_flag_global=True)
model.fit(X_train) # fit all models with X
model.approximate(X_train) # conduct model approximation if it is enabled
predicted_labels = model.predict(X_test) # predict labels
predicted_scores = model.decision_function(X_test) # predict scores
predicted_probs = model.predict_proba(X_test) # predict scores
###########################################################################
# compared with other approaches
evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
evaluate_print('average', y_test, average(predicted_scores))
evaluate_print('maximization', y_test, maximization(predicted_scores))
LOF(n_neighbors=75, contamination=contamination),
LOF(n_neighbors=80, contamination=contamination),
LOF(n_neighbors=85, contamination=contamination),
LOF(n_neighbors=90, contamination=contamination),
LOF(n_neighbors=95, contamination=contamination),
LOF(n_neighbors=100, contamination=contamination),
LOF(n_neighbors=5, contamination=contamination),
LOF(n_neighbors=10, contamination=contamination),
LOF(n_neighbors=15, contamination=contamination),
LOF(n_neighbors=25, contamination=contamination),
LOF(n_neighbors=35, contamination=contamination),
LOF(n_neighbors=45, contamination=contamination),
LOF(n_neighbors=50, contamination=contamination),
LOF(n_neighbors=55, contamination=contamination),
LOF(n_neighbors=60, contamination=contamination),
LOF(n_neighbors=65, contamination=contamination),
LOF(n_neighbors=70, contamination=contamination),
LOF(n_neighbors=75, contamination=contamination),
LOF(n_neighbors=80, contamination=contamination),
LOF(n_neighbors=85, contamination=contamination),
LOF(n_neighbors=90, contamination=contamination),
LOF(n_neighbors=95, contamination=contamination),
LOF(n_neighbors=100, contamination=contamination),
HBOS(contamination=contamination),
HBOS(contamination=contamination),
HBOS(contamination=contamination),
HBOS(contamination=contamination),
HBOS(contamination=contamination),
HBOS(contamination=contamination),
HBOS(contamination=contamination),
Fitted estimator.
"""
random_state = check_random_state(self.random_state)
X = check_array(X)
self.n_samples_, self.n_features_ = X.shape[0], X.shape[1]
self._set_n_classes(y)
# expect at least 2 features, does not make sense if only have
# 1 feature
check_parameter(self.n_features_, low=2, include_left=True,
param_name='n_features')
# check parameters
self._validate_estimator(default=LOF(n_jobs=self.n_jobs))
# use at least half of the features
self.min_features_ = int(0.5 * self.n_features_)
# Validate max_features
if isinstance(self.max_features, (numbers.Integral, np.integer)):
self.max_features_ = self.max_features
else: # float
self.max_features_ = int(self.max_features * self.n_features_)
# min_features and max_features could equal
check_parameter(self.max_features_, low=self.min_features_,
param_name='max_features', high=self.n_features_,
include_left=True, include_right=True)
self.estimators_ = []
The list of bool flag to indicate whether standardization is needed
"""
estimator_list = []
standardization_flag_list = []
# predefined range of n_neighbors for KNN, AvgKNN, and LOF
k_range = [1, 3, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
# validate the value of k
k_range = [k for k in k_range if k < X.shape[0]]
for k in k_range:
estimator_list.append(KNN(n_neighbors=k, method='largest'))
estimator_list.append(KNN(n_neighbors=k, method='mean'))
estimator_list.append(LOF(n_neighbors=k))
standardization_flag_list.append(True)
standardization_flag_list.append(True)
standardization_flag_list.append(True)
n_bins_range = [3, 5, 7, 9, 12, 15, 20, 25, 30, 50]
for n_bins in n_bins_range:
estimator_list.append(HBOS(n_bins=n_bins))
standardization_flag_list.append(False)
# predefined range of nu for one-class svm
nu_range = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.99]
for nu in nu_range:
estimator_list.append(OCSVM(nu=nu))
standardization_flag_list.append(True)
# predefined range for number of estimators in isolation forests
def make_mlo(hub, data, train):
'''
Create the Machine Learning Object used for this sequence
'''
return LOF(contamination=0.01)
LOF(n_neighbors=15, contamination=contamination),
LOF(n_neighbors=25, contamination=contamination),
LOF(n_neighbors=35, contamination=contamination),
LOF(n_neighbors=45, contamination=contamination),
HBOS(contamination=contamination),
PCA(contamination=contamination),
OCSVM(contamination=contamination),
KNN(n_neighbors=5, contamination=contamination),
KNN(n_neighbors=15, contamination=contamination),
KNN(n_neighbors=25, contamination=contamination),
KNN(n_neighbors=35, contamination=contamination),
KNN(n_neighbors=45, contamination=contamination),
IForest(n_estimators=50, contamination=contamination),
IForest(n_estimators=100, contamination=contamination),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)])
]
# number of the parallel jobs
n_jobs = 6
n_estimators = len(base_estimators)
# the algorithms that should be be using random projection
rp_clf_list = ['LOF', 'KNN', 'ABOD']
# the algorithms that should NOT use random projection
rp_ng_clf_list = ['IForest', 'PCA', 'HBOS']
# global flag for random projection
rp_flag_global = True
objective_dim = 6
rp_method = 'discrete'
# build flags for random projection
# Define the number of inliers and outliers
n_samples = 200
outliers_fraction = 0.25
clusters_separation = [0]
# Compare given detectors under given settings
# Initialize the data
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
n_inliers = int((1. - outliers_fraction) * n_samples)
n_outliers = int(outliers_fraction * n_samples)
ground_truth = np.zeros(n_samples, dtype=int)
ground_truth[-n_outliers:] = 1
# initialize a set of detectors for LSCP
detector_list = [LOF(n_neighbors=5), LOF(n_neighbors=10), LOF(n_neighbors=15),
LOF(n_neighbors=20), LOF(n_neighbors=25), LOF(n_neighbors=30),
LOF(n_neighbors=35), LOF(n_neighbors=40), LOF(n_neighbors=45),
LOF(n_neighbors=50)]
# Show the statics of the data
print('Number of inliers: %i' % n_inliers)
print('Number of outliers: %i' % n_outliers)
print(
'Ground truth shape is {shape}. Outlier are 1 and inliers are 0.\n'.format(
shape=ground_truth.shape))
print(ground_truth, '\n')
random_state = np.random.RandomState(42)
# Define nine outlier detection tools to be compared
classifiers = {
'Angle-based Outlier Detector (ABOD)':
ABOD(contamination=outliers_fraction),