Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Evaluation metric:
- 'roc_auc_score': ROC score
- 'prc_n_score': Precision @ rank n score
Returns
-------
score : float
"""
self.fit(X, y)
if scoring == 'roc_auc_score':
score = roc_auc_score(y, self.decision_scores_)
elif scoring == 'prc_n_score':
score = precision_n_scores(y, self.decision_scores_)
else:
raise NotImplementedError('PyOD built-in scoring only supports '
'ROC and Precision @ rank n')
print("{metric}: {score}".format(metric=scoring, score=score))
return score
decimals=4)
stat_mat[i, 1] = np.round(precision_n_scores(y_train, pseudo_labels),
decimals=4)
################## xgb train scores
regressor = RandomForestRegressor()
regressor.fit(X_train, pseudo_labels)
pseudo_scores = regressor.predict(X_train)
print('Iter', j + 1, i + 1, 'kd', clf_name, '|', 'train stat',
np.round(roc_auc_score(y_train, pseudo_scores), decimals=4), '|',
np.round(precision_n_scores(y_train, pseudo_scores), decimals=4))
stat_mat[i, 2] = np.round(roc_auc_score(y_train, pseudo_scores),
decimals=4)
stat_mat[i, 3] = np.round(precision_n_scores(y_train, pseudo_scores),
decimals=4)
################## original test time, roc, prn
start = time.time()
y_predict = clf.decision_function(X_test)
end = time.time()
# replace nan by mean
np_mean = np.nanmean(y_predict)
y_predict[np.isnan(y_predict)] = np_mean
print('Iter', j + 1, i + 1, clf_name,
np.round(end - start, decimals=4), '|',
np.round(roc_auc_score(y_test, y_predict), decimals=4), '|',
np.round(precision_n_scores(y_test, y_predict), decimals=4))
-------
score : float
.. deprecated:: 0.6.9
`fit_predict_score` will be removed in pyod 0.8.0.; it will be
replaced by calling `fit` function first and then accessing
`labels_` attribute for consistency. Scoring could be done by
calling an evaluation method, e.g., AUC ROC.
"""
self.fit(X)
if scoring == 'roc_auc_score':
score = roc_auc_score(y, self.decision_scores_)
elif scoring == 'prc_n_score':
score = precision_n_scores(y, self.decision_scores_)
else:
raise NotImplementedError('PyOD built-in scoring only supports '
'ROC and Precision @ rank n')
print("{metric}: {score}".format(metric=scoring, score=score))
return score
X_transformed, _ = jl_fit_transform(X, dim_new, "discrete")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
discrete_time.append(time.time() - start)
discrete_roc.append(roc_auc_score(y, y_train_scores))
discrete_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "circulant")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
circulant_time.append(time.time() - start)
circulant_roc.append(roc_auc_score(y, y_train_scores))
circulant_prn.append(precision_n_scores(y, y_train_scores))
X_transformed, _ = jl_fit_transform(X, dim_new, "toeplitz")
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
toeplitz_time.append(time.time() - start)
toeplitz_roc.append(roc_auc_score(y, y_train_scores))
toeplitz_prn.append(precision_n_scores(y, y_train_scores))
X_transformed = PCA_sklearn(n_components=dim_new).fit_transform(X)
start = time.time()
clf.fit(X_transformed)
y_train_scores = clf.decision_scores_
pca_time.append(time.time() - start)
pca_roc.append(roc_auc_score(y, y_train_scores))
pca_prn.append(precision_n_scores(y, y_train_scores))
for i, (clf_name, clf) in enumerate(classifiers.items()):
################## original version
clf.fit(X_train)
pseudo_labels = clf.decision_scores_
# replace nan by mean
np_mean = np.nanmean(pseudo_labels)
pseudo_labels[np.isnan(pseudo_labels)] = np_mean
print('Iter', j + 1, i + 1, clf_name, '|', 'train stat',
np.round(roc_auc_score(y_train, pseudo_labels), decimals=4), '|',
np.round(precision_n_scores(y_train, pseudo_labels), decimals=4))
stat_mat[i, 0] = np.round(roc_auc_score(y_train, pseudo_labels),
decimals=4)
stat_mat[i, 1] = np.round(precision_n_scores(y_train, pseudo_labels),
decimals=4)
################## xgb train scores
regressor = RandomForestRegressor()
regressor.fit(X_train, pseudo_labels)
pseudo_scores = regressor.predict(X_train)
print('Iter', j + 1, i + 1, 'kd', clf_name, '|', 'train stat',
np.round(roc_auc_score(y_train, pseudo_scores), decimals=4), '|',
np.round(precision_n_scores(y_train, pseudo_scores), decimals=4))
stat_mat[i, 2] = np.round(roc_auc_score(y_train, pseudo_scores),
decimals=4)
stat_mat[i, 3] = np.round(precision_n_scores(y_train, pseudo_scores),
decimals=4)
'K Nearest Neighbors (KNN)': 5,
'Local Outlier Factor (LOF)': 6,
'Minimum Covariance Determinant (MCD)': 7,
'One-class SVM (OCSVM)': 8,
'Principal Component Analysis (PCA)': 9,
}
for clf_name, clf in classifiers.items():
t0 = time()
clf.fit(X_train_norm)
test_scores = clf.decision_function(X_test_norm)
t1 = time()
duration = round(t1 - t0, ndigits=4)
roc = round(roc_auc_score(y_test, test_scores), ndigits=4)
prn = round(precision_n_scores(y_test, test_scores), ndigits=4)
print('{clf_name} ROC:{roc}, precision @ rank n:{prn}, '
'execution time: {duration}s'.format(
clf_name=clf_name, roc=roc, prn=prn, duration=duration))
time_mat[i, classifiers_indices[clf_name]] = duration
roc_mat[i, classifiers_indices[clf_name]] = roc
prn_mat[i, classifiers_indices[clf_name]] = prn
time_list = time_list + np.mean(time_mat, axis=0).tolist()
temp_df = pd.DataFrame(time_list).transpose()
temp_df.columns = df_columns
time_df = pd.concat([time_df, temp_df], axis=0)
roc_list = roc_list + np.mean(roc_mat, axis=0).tolist()
temp_df = pd.DataFrame(roc_list).transpose()
y : list or numpy array of shape (n_samples,)
The ground truth. Binary (0: inliers, 1: outliers).
y_pred : list or numpy array of shape (n_samples,)
The raw outlier scores as returned by a fitted model.
"""
y = column_or_1d(y)
y_pred = column_or_1d(y_pred)
check_consistent_length(y, y_pred)
print('{clf_name} ROC:{roc}, precision @ rank n:{prn}'.format(
clf_name=clf_name,
roc=np.round(roc_auc_score(y, y_pred), decimals=4),
prn=np.round(precision_n_scores(y, y_pred), decimals=4)))
np.round(roc_auc_score(y_test, y_predict), decimals=4), '|',
np.round(precision_n_scores(y_test, y_predict), decimals=4))
stat_mat[i, 4] = np.round(end - start, decimals=4)
stat_mat[i, 5] = np.round(roc_auc_score(y_test, y_predict), decimals=4)
stat_mat[i, 6] = np.round(precision_n_scores(y_test, y_predict),
decimals=4)
################## original test time, roc, prn
start = time.time()
y_predict_xgb = regressor.predict(X_test)
end = time.time()
print('Iter', j + 1, i + 1, 'kd', clf_name,
np.round(end - start, decimals=4), '|',
np.round(roc_auc_score(y_test, y_predict_xgb), decimals=4), '|',
np.round(precision_n_scores(y_test, y_predict_xgb), decimals=4))
stat_mat[i, 7] = np.round(end - start, decimals=4)
stat_mat[i, 8] = np.round(roc_auc_score(y_test, y_predict_xgb),
decimals=4)
stat_mat[i, 9] = np.round(precision_n_scores(y_test, y_predict_xgb),
decimals=4)
print()
stat_mat_all = stat_mat_all + stat_mat
stat_mat_all = stat_mat_all / n_iter
roc_summary = pd.DataFrame(stat_mat_all, columns=report_list)
roc_summary['clf'] = classifier_names
print(roc_summary)
start = time.time()
y_predict = clf.decision_function(X_test)
end = time.time()
# replace nan by mean
np_mean = np.nanmean(y_predict)
y_predict[np.isnan(y_predict)] = np_mean
print('Iter', j + 1, i + 1, clf_name,
np.round(end - start, decimals=4), '|',
np.round(roc_auc_score(y_test, y_predict), decimals=4), '|',
np.round(precision_n_scores(y_test, y_predict), decimals=4))
stat_mat[i, 4] = np.round(end - start, decimals=4)
stat_mat[i, 5] = np.round(roc_auc_score(y_test, y_predict), decimals=4)
stat_mat[i, 6] = np.round(precision_n_scores(y_test, y_predict),
decimals=4)
################## original test time, roc, prn
start = time.time()
y_predict_xgb = regressor.predict(X_test)
end = time.time()
print('Iter', j + 1, i + 1, 'kd', clf_name,
np.round(end - start, decimals=4), '|',
np.round(roc_auc_score(y_test, y_predict_xgb), decimals=4), '|',
np.round(precision_n_scores(y_test, y_predict_xgb), decimals=4))
stat_mat[i, 7] = np.round(end - start, decimals=4)
stat_mat[i, 8] = np.round(roc_auc_score(y_test, y_predict_xgb),
decimals=4)
stat_mat[i, 9] = np.round(precision_n_scores(y_test, y_predict_xgb),
decimals=4)