Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# case 1: all predictions are within allowance
if self.explore_cnt <= self.explore_rounds:
return np.random.randint(self.nchoices, size = X.shape[0])
# case 2: some predictions are within allowance, others are not
else:
n_explore = self.explore_rounds - self.explore_cnt + X.shape[0]
pred = np.empty(X.shape[0], type = "float64")
pred[:n_explore] = np.random.randint(self.nchoices, n_explore)
pred[n_explore:] = self._oracles.predict(X[n_explore:])
return pred
else:
return self._oracles.predict(X)
class ActiveExplorer(_ActivePolicy):
"""
Active Explorer
Selects a proportion of actions according to an active learning heuristic based on gradient.
Works only for differentiable and preferably smooth functions.
Note
----
Here, for the predictions that are made according to an active learning heuristic
(these are selected at random, just like in Epsilon-Greedy), the guiding heuristic
is the gradient that the observation, having either label (either weighted by the estimted
probability, or taking the maximum or minimum), would produce on each model that
predicts a class, given the current coefficients for that model. This of course requires
being able to calculate gradients - package comes with pre-defined gradient functions for
logistic regression, and allows passing custom functions for others.
grad_norms = self._rand_grad_norms(X,
self._oracles.get_n_pos(choice), self._oracles.get_n_neg(choice))
if grad_crit == 'min':
pred[:, choice] = grad_norms.min(axis = 1)
elif grad_crit == 'max':
pred[:, choice] = grad_norms.max(axis = 1)
elif grad_crit == 'weighted':
pred[:, choice] = (pred[:, choice].reshape((-1, 1)) * grad_norms).sum(axis = 1)
else:
raise ValueError("Something went wrong. Please open an issue in GitHub indicating what you were doing.")
return pred
class AdaptiveGreedy(_ActivePolicy):
"""
Adaptive Greedy
Takes the action with highest estimated reward, unless that estimation falls below a certain
threshold, in which case it takes a an action either at random or according to an active learning
heuristic (same way as `ActiveExplorer`).
Note
----
The hyperparameters here can make a large impact on the quality of the choices. Be sure
to tune the threshold (or percentile), decay, and prior (or smoothing parameters).
Note
----
The threshold for the reward probabilities can be set to a hard-coded number, or
to be calculated dynamically by keeping track of the predictions it makes, and taking