Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from pyod.models.cof import COF
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train COF detector
clf_name = 'COF'
clf = COF(n_neighbors=30)
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
from pyod.models.xgbod import XGBOD
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
if __name__ == "__main__":
# Define data file and read X and y
# Generate some data if the source data is missing
mat_file = 'cardio.mat'
try:
mat = loadmat(os.path.join('data', mat_file))
except TypeError:
print('{data_file} does not exist. Use generated data'.format(
data_file=mat_file))
X, y = generate_data(train_only=True) # load data
except IOError:
print('{data_file} does not exist. Use generated data'.format(
data_file=mat_file))
X, y = generate_data(train_only=True) # load data
else:
X = mat['X']
y = mat['y'].ravel()
X, y = check_X_y(X, y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,
random_state=42)
# train XGBOD detector
clf_name = 'XGBOD'
clf = XGBOD(random_state=42)
clf.fit(X_train, y_train)
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
return
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train one_class_svm detector
clf_name = 'OneClassSVM'
clf = OCSVM()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from pyod.models.so_gaal import SO_GAAL
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 30000 # number of training points
n_test = 3000 # number of testing points
n_features = 300 # number of features
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=n_features,
contamination=contamination,
random_state=42)
# train SO_GAAL detector
clf_name = 'SO_GAAL'
clf = SO_GAAL(contamination=contamination)
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
return
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train LOF detector
clf_name = 'MCD'
clf = MCD()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from pyod.models.auto_encoder import AutoEncoder
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 20000 # number of training points
n_test = 2000 # number of testing points
n_features = 300 # number of features
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=n_features,
contamination=contamination,
random_state=42)
# train AutoEncoder detector
clf_name = 'AutoEncoder'
clf = AutoEncoder(epochs=30, contamination=contamination)
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
return
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train CBLOF detector
clf_name = 'CBLOF'
clf = CBLOF()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
return
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train LOF detector
clf_name = 'LOF'
clf = LOF()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.ocsvm import OCSVM
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
from combo.models.detector_lscp import LSCP
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
detectors = [KNN(), LOF(), OCSVM()]
clf = LSCP(base_estimators=detectors)
clf_name = 'LSCP'
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
plt.savefig('{clf_name}.png'.format(clf_name=clf_name), dpi=300)
if show_figure:
plt.show()
return
if __name__ == "__main__":
contamination = 0.1 # percentage of outliers
n_train = 200 # number of training points
n_test = 100 # number of testing points
# Generate sample data
X_train, y_train, X_test, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=2,
contamination=contamination,
random_state=42)
# train HBOS detector
clf_name = 'HBOS'
clf = HBOS()
clf.fit(X_train)
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# get the prediction on the test data
y_test_pred = clf.predict(X_test) # outlier labels (0 or 1)