Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# the algorithms that should NOT use random projection
rp_ng_clf_list = ['IForest', 'PCA', 'HBOS']
# global flag for random projection
rp_flag_global = True
objective_dim = 6
rp_method = 'discrete'
# build flags for random projection
rp_flags, base_estimator_names = build_codes(base_estimators, rp_clf_list,
rp_ng_clf_list, rp_flag_global)
# load the pre-trained cost predictor to forecast the train cost
clf_train = joblib.load(
os.path.join('../suod', 'models', 'saved_models', 'bps_train.joblib'))
time_cost_pred = cost_forecast_meta(clf_train, X, base_estimator_names)
# schedule the tasks
n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
n_estimators, n_jobs)
print(starts) # this is the list of being split
start = time.time()
print('Parallel Training...')
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
# max_nbytes can be dropped on other OS
all_results = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
delayed(_parallel_fit)(
n_estimators_list[i],
Returns
-------
anomaly_scores : numpy array of shape (n_samples,)
The anomaly score of the input samples.
"""
X = check_array(X)
n_samples, n_features = X.shape[0], X.shape[1]
# decide whether bps is needed
# it is turned off
if self.bps_flag:
# load the pre-trained cost predictor to forecast the train cost
cost_predictor = joblib.load(self.cost_forecast_loc_pred_)
time_cost_pred = cost_forecast_meta(cost_predictor, X,
self.base_estimator_names)
n_estimators_list, starts, n_jobs = balanced_scheduling(
time_cost_pred, self.n_estimators, self.n_jobs)
else:
# use simple equal split by sklearn
n_estimators_list, starts, n_jobs = _partition_estimators(
self.n_estimators, self.n_jobs)
# fit the base models
if self.verbose:
print('Parallel score prediction...')
start = time.time()
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
-------
outlier_labels : numpy array of shape (n_samples, n_estimators)
For each observation, tells whether or not
it should be considered as an outlier according to the
fitted model. 0 stands for inliers and 1 for outliers.
"""
X = check_array(X)
n_samples, n_features = X.shape[0], X.shape[1]
# decide whether bps is needed
# it is turned off
if self.bps_flag:
# load the pre-trained cost predictor to forecast the train cost
cost_predictor = joblib.load(self.cost_forecast_loc_pred_)
time_cost_pred = cost_forecast_meta(cost_predictor, X,
self.base_estimator_names)
n_estimators_list, starts, n_jobs = balanced_scheduling(
time_cost_pred, self.n_estimators, self.n_jobs)
else:
# use simple equal split by sklearn
n_estimators_list, starts, n_jobs = _partition_estimators(
self.n_estimators, self.n_jobs)
# fit the base models
if self.verbose:
print('Parallel label prediction...')
start = time.time()
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
verbose=True)
for i in range(n_jobs))
print('Balanced Scheduling Total Test Time:', time.time() - start)
approximators = _unfold_parallel(all_approx_results, n_jobs)
# %% Second BPS for prediction
###############################################################################
# still build the rank sum by BPS
# load the pre-trained cost predictor to forecast the prediction cost
clf_prediction = joblib.load(
os.path.join('../suod', 'models', 'saved_models', 'bps_prediction.joblib'))
time_cost_pred = cost_forecast_meta(clf_prediction, X, base_estimator_names)
# TODO: add a second-stage tuner for prediction stage
n_estimators_list, starts, n_jobs = balanced_scheduling(time_cost_pred,
n_estimators, n_jobs)
print('Parallel Label Predicting without Approximators...')
# all_results_pred = Parallel(n_jobs=n_jobs, max_nbytes=None, verbose=True)(
# delayed(_parallel_predict)(
# n_estimators_list[i],
# trained_estimators[starts[i]:starts[i + 1]],
# approximators[starts[i]:starts[i + 1]],
# X,
# n_estimators,
# rp_flags[starts[i]:starts[i + 1]],
self.target_dim_frac_ = self.target_dim_frac
else: # float
self.target_dim_frac_ = int(self.target_dim_frac * n_features)
# build flags for random projection
self.rp_flags_, _ = build_codes(self.base_estimators, self.rp_clf_list,
self.rp_ng_clf_list,
self.rp_flag_global)
# decide whether bps is needed
# it is turned off
if self.bps_flag:
# load the pre-trained cost predictor to forecast the train cost
cost_predictor = joblib.load(self.cost_forecast_loc_fit_)
time_cost_pred = cost_forecast_meta(cost_predictor, X,
self.base_estimator_names)
# use BPS
n_estimators_list, starts, n_jobs = balanced_scheduling(
time_cost_pred, self.n_estimators, self.n_jobs)
else:
# use the default sklearn equal split
n_estimators_list, starts, n_jobs = _partition_estimators(
self.n_estimators, self.n_jobs)
# fit the base models
print('Parallel Training...')
start = time.time()
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806
outlier_probability : numpy array of shape (n_samples,)
For each observation, tells whether or not
it should be considered as an outlier according to the
fitted model. Return the outlier probability, ranging
in [0,1].
"""
X = check_array(X)
n_samples, n_features = X.shape[0], X.shape[1]
# decide whether bps is needed
# it is turned off
if self.bps_flag:
# load the pre-trained cost predictor to forecast the train cost
cost_predictor = joblib.load(self.cost_forecast_loc_pred_)
time_cost_pred = cost_forecast_meta(cost_predictor, X,
self.base_estimator_names)
n_estimators_list, starts, n_jobs = balanced_scheduling(
time_cost_pred, self.n_estimators, self.n_jobs)
else:
# use simple equal split by sklearn
n_estimators_list, starts, n_jobs = _partition_estimators(
self.n_estimators, self.n_jobs)
# fit the base models
if self.verbose:
print('Parallel score prediction...')
start = time.time()
# TODO: code cleanup. There is an existing bug for joblib on Windows:
# https://github.com/joblib/joblib/issues/806