Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Parameters
----------
X : array (n_samples, n_features)
Matrix of covariates for the available data.
a : array (n_samples), int type
Arms or actions that were chosen for each observations.
r : array (n_samples), {0,1}
Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
p : array (n_samples)
Reward estimates for the actions that were chosen by the policy.
"""
try:
from costsensitive import RegressionOneVsRest, WeightedAllPairs
except:
raise ValueError("This functionality requires package 'costsensitive'.\nCan be installed with 'pip install costsensitive'.")
p = _check_1d_inp(p)
assert p.shape[0] == X.shape[0]
l = -r
if type(self.reward_estimator) == np.ndarray:
C = self.reward_estimator
elif 'predict_proba_separate' in dir(self.reward_estimator):
C = -self.reward_estimator.predict_proba_separate(X)
elif 'predict_proba' in dir(self.reward_estimator):
reward_estimator = SeparateClassifiers(self.reward_estimator, self.nchoices, beta_prior = self.beta_prior, smoothing = self.smoothing)
reward_estimator.fit(X, a, r)
C = -reward_estimator.predict_proba_separate(X)
else:
raise ValueError("Error: couldn't obtain reward estimates. Are you passing the right input to 'reward_estimator'?")
if self.handle_invalid:
C[C == 1] = np.random.beta(3, 1, size = C.shape)[C == 1]
Only used when passing a classifier object to 'reward_estimator'.
handle_invalid : bool
Whether to replace 0/1 estimated rewards with randomly-generated numbers (see Note)
c : None or float
Constant by which to multiply all scores from the exploration policy.
pmin : None or float
Scores (from the exploration policy) will be converted to the minimum between
pmin and the original estimate.
References
----------
.. [1] Dudík, Miroslav, John Langford, and Lihong Li. "Doubly robust policy evaluation and learning."
arXiv preprint arXiv:1103.4601 (2011).
"""
X,a,r=_check_fit_input(X,a,r)
p=_check_1d_inp(p)
pred=_check_1d_inp(pred)
assert p.shape[0]==X.shape[0]
assert pred.shape[0]==X.shape[0]
if c is not None:
assert isinstance(c, float)
if pmin is not None:
assert isinstance(pmin, float)
if type(reward_estimator)==np.ndarray:
assert reward_estimator.shape[1]==2
assert reward_estimator.shape[0]==X.shape[0]
rhat_new = reward_estimator[:, 0]
rhat_old = reward_estimator[:, 1]
elif 'predict_proba_separate' in dir(reward_estimator):
rhat = reward_estimator.predict_proba_separate(X)
rhat_new = rhat[np.arange(rhat.shape[0]), pred]
def _check_inputs(self, X, y, sample_weight):
X = _check_X_input(X)
y = _check_1d_inp(y)
assert X.shape[0] == y.shape[0]
if sample_weight is None:
sample_weight = np.ones(X.shape[0])
assert sample_weight.shape[0] == X.shape[0]
sample_weight /= sample_weight.sum()
return X, y, sample_weight
"""
Fits the Offset Tree estimator to partially-labeled data collected from a different policy.
Parameters
----------
X : array (n_samples, n_features)
Matrix of covariates for the available data.
a : array (n_samples), int type
Arms or actions that were chosen for each observations.
r : array (n_samples), {0,1}
Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
p : array (n_samples)
Reward estimates for the actions that were chosen by the policy.
"""
X, a, r = _check_fit_input(X, a, r)
p = _check_1d_inp(p)
assert p.shape[0] == X.shape[0]
if self.c is not None:
p = self.c * p
if self.pmin is not None:
p = np.clip(p, a_min = self.pmin, a_max = None)
self._oracles = [deepcopy(self.base_algorithm) for c in range(self.nchoices - 1)]
Parallel(n_jobs=self.njobs, verbose=0, require="sharedmem")(delayed(self._fit)(classif, X, a, r, p) for classif in range(len(self._oracles)))
handle_invalid : bool
Whether to replace 0/1 estimated rewards with randomly-generated numbers (see Note)
c : None or float
Constant by which to multiply all scores from the exploration policy.
pmin : None or float
Scores (from the exploration policy) will be converted to the minimum between
pmin and the original estimate.
References
----------
.. [1] Dudík, Miroslav, John Langford, and Lihong Li. "Doubly robust policy evaluation and learning."
arXiv preprint arXiv:1103.4601 (2011).
"""
X,a,r=_check_fit_input(X,a,r)
p=_check_1d_inp(p)
pred=_check_1d_inp(pred)
assert p.shape[0]==X.shape[0]
assert pred.shape[0]==X.shape[0]
if c is not None:
assert isinstance(c, float)
if pmin is not None:
assert isinstance(pmin, float)
if type(reward_estimator)==np.ndarray:
assert reward_estimator.shape[1]==2
assert reward_estimator.shape[0]==X.shape[0]
rhat_new = reward_estimator[:, 0]
rhat_old = reward_estimator[:, 1]
elif 'predict_proba_separate' in dir(reward_estimator):
rhat = reward_estimator.predict_proba_separate(X)
rhat_new = rhat[np.arange(rhat.shape[0]), pred]
rhat_old = rhat[np.arange(rhat.shape[0]), a]