Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
clf = LOF() # change this to other detection algorithms
clf.fit(X)
y_train_scores = clf.decision_scores_
original_time.append(time.time() - start)
original_roc.append(roc_auc_score(y, y_train_scores))
original_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "basic")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
basic_time.append(time.time() - start)
basic_roc.append(roc_auc_score(y, y_train_scores))
basic_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
discrete_time.append(time.time() - start)
discrete_roc.append(roc_auc_score(y, y_train_scores))
discrete_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
circulant_time.append(time.time() - start)
circulant_roc.append(roc_auc_score(y, y_train_scores))
circulant_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
discrete_time.append(time.time() - start)
discrete_roc.append(roc_auc_score(y, y_train_scores))
discrete_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
circulant_time.append(time.time() - start)
circulant_roc.append(roc_auc_score(y, y_train_scores))
circulant_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
toeplitz_time.append(time.time() - start)
toeplitz_roc.append(roc_auc_score(y, y_train_scores))
toeplitz_prn.append(precision_n_scores(y, y_train_scores))
X_transformed = PCA_sklearn(n_components=dim_new).fit_transform(X)
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
pca_time.append(time.time() - start)
pca_roc.append(roc_auc_score(y, y_train_scores))
pca_prn.append(precision_n_scores(y, y_train_scores))
selected_features = generate_bagging_indices(random_state=j,
pca_time = []
rp_roc = []
rp_prn = []
rp_time = []
for j in range(n_iter):
start = time.time()
clf = LOF() # change this to other detection algorithms
clf.fit(X)
y_train_scores = clf.decision_scores_
original_time.append(time.time() - start)
original_roc.append(roc_auc_score(y, y_train_scores))
original_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "basic")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
basic_time.append(time.time() - start)
basic_roc.append(roc_auc_score(y, y_train_scores))
basic_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
discrete_time.append(time.time() - start)
discrete_roc.append(roc_auc_score(y, y_train_scores))
discrete_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
def _parallel_fit(n_estimators, clfs, X, total_n_estimators,
rp_flags, objective_dim, rp_method, verbose):
X = check_array(X)
# Build estimators
estimators = []
rp_transformers = []
for i in range(n_estimators):
estimator = clone(clfs[i])
if verbose > 1:
print("Building estimator %d of %d for this parallel run "
"(total %d)..." % (i + 1, n_estimators, total_n_estimators))
if rp_flags[i] == 1:
X_scaled, jlt_transformer = jl_fit_transform(X, objective_dim,
rp_method)
rp_transformers.append(jlt_transformer)
estimator.fit(X_scaled)
estimators.append(estimator)
else:
# if projection is not used, use an identity matrix to keep the shape
rp_transformers.append(np.ones([X.shape[1], X.shape[1]]))
estimator.fit(X)
estimators.append(estimator)
return estimators, rp_transformers
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
basic_time.append(time.time() - start)
basic_roc.append(roc_auc_score(y, y_train_scores))
basic_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
discrete_time.append(time.time() - start)
discrete_roc.append(roc_auc_score(y, y_train_scores))
discrete_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
circulant_time.append(time.time() - start)
circulant_roc.append(roc_auc_score(y, y_train_scores))
circulant_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
toeplitz_time.append(time.time() - start)
toeplitz_roc.append(roc_auc_score(y, y_train_scores))
toeplitz_prn.append(precision_n_scores(y, y_train_scores))
X_transformed = PCA_sklearn(n_components=dim_new).fit_transform(X)
y = mat['y']
# split dataset into train and test
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
# standardize data to be digestible for most algorithms
X_train, X_test = standardizer(X_train, X_test)
contamination = y.sum() / len(y)
# get estimators for training and prediction
base_estimators = get_estimators(contamination=contamination)
##########################################################################
model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
approx_clf=approx_clf,
n_jobs=n_jobs, bps_flag=True, contamination=contamination,
approx_flag_global=True)
start = time.time()
model.fit(X_train) # fit all models with X
print('Fit time:', time.time() - start)
print()
start = time.time()
model.approximate(X_train) # conduct model approximation if it is enabled
print('Approximation time:', time.time() - start)
print()
start = time.time()
predicted_labels = model.predict(X_test) # predict labels
LOF(n_neighbors=45, contamination=contamination),
HBOS(contamination=contamination),
PCA(contamination=contamination),
OCSVM(contamination=contamination),
KNN(n_neighbors=5, contamination=contamination),
KNN(n_neighbors=15, contamination=contamination),
KNN(n_neighbors=25, contamination=contamination),
KNN(n_neighbors=35, contamination=contamination),
KNN(n_neighbors=45, contamination=contamination),
IForest(n_estimators=50, contamination=contamination),
IForest(n_estimators=100, contamination=contamination),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)])
]
model = SUOD(base_estimators=base_estimators, n_jobs=6, bps_flag=True,
contamination=contamination, approx_flag_global=True)
model.fit(X_train) # fit all models with X
model.approximate(X_train) # conduct model approximation if it is enabled
predicted_labels = model.predict(X_test) # predict labels
predicted_scores = model.decision_function(X_test) # predict scores
predicted_probs = model.predict_proba(X_test) # predict scores
###########################################################################
# compared with other approaches
evaluate_print('majority vote', y_test, majority_vote(predicted_labels))
evaluate_print('average', y_test, average(predicted_scores))
evaluate_print('maximization', y_test, maximization(predicted_scores))
clf = LOF()
clf.fit(X_train)
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
LSCP(detector_list=[LOF(contamination=contamination),
LOF(contamination=contamination)]),
]
# model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
# n_jobs=6, bps_flag=False, contamination=contamination,
# approx_flag_global=True)
model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
n_jobs=6, bps_flag=True, contamination=contamination,
approx_flag_global=True)
start = time.time()
model.fit(X) # fit all models with X
print('Fit time:', time.time() - start)
print()
start = time.time()
model.approximate(X) # conduct model approximation if it is enabled
print('Approximation time:', time.time() - start)
print()
start = time.time()
predicted_labels = model.predict(X) # predict labels
print('Predict time:', time.time() - start)
X = mat['X']
y = mat['y']
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.4, random_state=42)
# standardize data to be digestible for most algorithms
X_train, X_test = standardizer(X_train, X_test)
contamination = y.sum() / len(y)
base_estimators = deepcopy(get_estimators(contamination=contamination))
##########################################################################
model = SUOD(base_estimators=base_estimators, rp_flag_global=True,
approx_clf=RandomForestRegressor(),
n_jobs=n_jobs, bps_flag=True, contamination=contamination,
approx_flag_global=True)
start = time.time()
model.fit(X_train) # fit all models with X
print('Fit time:', time.time() - start)
print()
start = time.time()
model.approximate(X_train) # conduct model approximation if it is enabled
print('Approximation time:', time.time() - start)
print()
start = time.time()
predicted_labels = model.predict(X_test) # predict labels
approx_flags
approximator
verbose
Returns
-------
"""
X = check_array(X)
# Build estimators
approximators = []
# TODO: approximators can be different
for i in range(n_estimators):
# project matrix
X_scaled = jl_transform(X, rp_transformers[i])
estimator = clfs[i]
check_is_fitted(estimator, ['decision_scores_'])
if verbose > 1:
print("Building estimator %d of %d for this parallel run "
"(total %d)..." % (i + 1, n_estimators, total_n_estimators))
if approx_flags[i] == 1:
# operate on the reduce space
pseudo_scores = estimator.decision_scores_
# pseudo_scores = estimator.decision_function(X)
# use the same type of approximator for all models
base_approximater = clone(approximator)
base_approximater.fit(X_scaled, pseudo_scores)