Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _get_competent_detectors(self, scores):
""" algorithm for selecting the most competent detectors
:param scores:
:param n_bins:
:param n_selected:
:return:
"""
scores = scores.reshape(-1, 1)
hist, bin_edges = np.histogram(scores, bins=self.n_bins)
# dense_bin = np.argmax(hist)
max_bins = argmaxn(hist, n=self.n_selected)
candidates = []
# print(hist)
for max_bin in max_bins:
# print(bin_edges[max_bin], bin_edges[max_bin+1])
selected = np.where((scores >= bin_edges[max_bin])
& (scores <= bin_edges[max_bin + 1]))
# print(selected)
candidates = candidates + selected[0].tolist()
# print(np.mean(scores[candidates,:]), np.mean(scores))
# return np.mean(scores[candidates, :])
return candidates
test_sample = X[i, :].reshape(1, -1)
train_inds = ind_arr[i, :]
# ground truth
y_train_sample = self.y_train_[train_inds]
clf_performance = np.zeros([self.n_base_estimators_, ])
for j, clf in enumerate(self.base_estimators):
y_train_clf = self.y_train_predicted_[train_inds, j]
clf_performance[j] = accuracy_score(y_train_sample,
y_train_clf)
# print(clf_performance)
# get the indices of the best performing clfs
select_clf_inds = argmaxn(clf_performance, n=self.n_selected_clfs)
select_clf_weights = clf_performance[select_clf_inds]. \
reshape(1, len(select_clf_inds))
# print(select_clf_inds)
all_scores = np.zeros([1, len(select_clf_inds)])
all_proba = np.zeros([1, self._classes, len(select_clf_inds)])
for k, clf_ind in enumerate(select_clf_inds):
clf = self.base_estimators[clf_ind]
# make prediction
if predict_proba:
all_proba[:, :, k] = clf.predict_proba(test_sample)
else:
all_scores[:, k] = clf.predict(test_sample)
scores = scores.reshape(-1, 1)
# TODO: handle when Pearson score is 0
# if scores contain nan, change it to 0
if np.isnan(scores).any():
scores = np.nan_to_num(scores)
if self.n_bins > self.n_clf:
warnings.warn(
"The number of histogram bins is greater than the number of "
"classifiers, reducing n_bins to n_clf.")
self.n_bins = self.n_clf
hist, bin_edges = np.histogram(scores, bins=self.n_bins)
# find n_selected largest bins
max_bins = argmaxn(hist, n=self.n_selected)
candidates = []
# iterate through bins
for max_bin in max_bins:
# determine which detectors are inside this bin
selected = np.where((scores >= bin_edges[max_bin])
& (scores <= bin_edges[max_bin + 1]))
# add to list of candidates
candidates = candidates + selected[0].tolist()
return candidates