Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if c is not None:
assert isinstance(c, float)
if pmin is not None:
assert isinstance(pmin, float)
if type(reward_estimator)==np.ndarray:
assert reward_estimator.shape[1]==2
assert reward_estimator.shape[0]==X.shape[0]
rhat_new = reward_estimator[:, 0]
rhat_old = reward_estimator[:, 1]
elif 'predict_proba_separate' in dir(reward_estimator):
rhat = reward_estimator.predict_proba_separate(X)
rhat_new = rhat[np.arange(rhat.shape[0]), pred]
rhat_old = rhat[np.arange(rhat.shape[0]), a]
elif 'predict_proba' in dir(reward_estimator):
reward_estimator = SeparateClassifiers(reward_estimator, nchoices)
reward_estimator.fit(X, a, r)
rhat = reward_estimator.predict_proba_separate(X)
rhat_new = rhat[np.arange(rhat.shape[0]), pred]
rhat_old = rhat[np.arange(rhat.shape[0]), a]
else:
error_msg = "'reward_estimator' must be either an array, a classifier with"
error_msg += "'predict_proba', or a 'SeparateClassifiers' object."
raise ValueError(error_msg)
if handle_invalid:
rhat_new[rhat_new==1]=np.random.beta(3,1,size=rhat_new.shape)[rhat_new==1]
rhat_new[rhat_new==0]=np.random.beta(1,3,size=rhat_new.shape)[rhat_new==0]
rhat_old[rhat_old==1]=np.random.beta(3,1,size=rhat_old.shape)[rhat_old==1]
rhat_old[rhat_old==0]=np.random.beta(1,3,size=rhat_old.shape)[rhat_old==0]
if c is not None:
Reward estimates for the actions that were chosen by the policy.
"""
try:
from costsensitive import RegressionOneVsRest, WeightedAllPairs
except:
raise ValueError("This functionality requires package 'costsensitive'.\nCan be installed with 'pip install costsensitive'.")
p = _check_1d_inp(p)
assert p.shape[0] == X.shape[0]
l = -r
if type(self.reward_estimator) == np.ndarray:
C = self.reward_estimator
elif 'predict_proba_separate' in dir(self.reward_estimator):
C = -self.reward_estimator.predict_proba_separate(X)
elif 'predict_proba' in dir(self.reward_estimator):
reward_estimator = SeparateClassifiers(self.reward_estimator, self.nchoices, beta_prior = self.beta_prior, smoothing = self.smoothing)
reward_estimator.fit(X, a, r)
C = -reward_estimator.predict_proba_separate(X)
else:
raise ValueError("Error: couldn't obtain reward estimates. Are you passing the right input to 'reward_estimator'?")
if self.handle_invalid:
C[C == 1] = np.random.beta(3, 1, size = C.shape)[C == 1]
C[C == 0] = np.random.beta(1, 3, size = C.shape)[C == 0]
if self.c is not None:
p = self.c * p
if self.pmin is not None:
p = np.clip(p, a_min = self.pmin, a_max = None)
C[np.arange(C.shape[0]), a] += (l - C[np.arange(C.shape[0]), a]) / p.reshape(-1)
if self.method == 'rovr':