Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fit(self, X, a, r, p):
"""
Fits the Offset Tree estimator to partially-labeled data collected from a different policy.
Parameters
----------
X : array (n_samples, n_features)
Matrix of covariates for the available data.
a : array (n_samples), int type
Arms or actions that were chosen for each observations.
r : array (n_samples), {0,1}
Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
p : array (n_samples)
Reward estimates for the actions that were chosen by the policy.
"""
X, a, r = _check_fit_input(X, a, r)
p = _check_1d_inp(p)
assert p.shape[0] == X.shape[0]
if self.c is not None:
p = self.c * p
if self.pmin is not None:
p = np.clip(p, a_min = self.pmin, a_max = None)
self._oracles = [deepcopy(self.base_algorithm) for c in range(self.nchoices - 1)]
Parallel(n_jobs=self.njobs, verbose=0, require="sharedmem")(delayed(self._fit)(classif, X, a, r, p) for classif in range(len(self._oracles)))
Parameters
----------
X : array (n_samples, n_features)
Matrix of covariates for the available data.
a : array (n_samples), int type
Arms or actions that were chosen for each observations.
r : array (n_samples), {0,1}
Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
Returns
-------
self : obj
This object
"""
X, a, r = _check_fit_input(X, a, r, self.choice_names)
for n in range(self.nchoices):
this_action = a == n
self._oracles[n].partial_fit(X[this_action, :], r[this_action].astype('float64'))
self.is_fitted = True
return self
Parameters
----------
X : array (n_samples, n_features)
Matrix of covariates for the available data.
a : array (n_samples), int type
Arms or actions that were chosen for each observations.
r : array (n_samples), {0,1}
Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
Returns
-------
self : obj
This object
"""
X, a, r = _check_fit_input(X, a, r, self.choice_names)
self._oracles = _OneVsRest(self.base_algorithm,
X, a, r,
self.nchoices,
self.beta_prior[1], self.beta_prior[0][0], self.beta_prior[0][1],
self.smoothing,
self.assume_unique_reward,
self.batch_train,
njobs = self.njobs)
self.is_fitted = True
return self
Parameters
----------
X : array (n_samples, n_features)
Matrix of covariates for the available data.
a : array (n_samples), int type
Arms or actions that were chosen for each observations.
r : array (n_samples), {0,1}
Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
Returns
-------
self : obj
This object
"""
X, a, r = _check_fit_input(X, a, r, self.choice_names)
self._oracles = _OneVsRest(self.base_algorithm,
X, a, r,
self.nchoices,
self.beta_prior[1], self.beta_prior[0][0], self.beta_prior[0][1],
self.smoothing,
self.assume_unique_reward,
self.batch_train,
self._force_fit,
force_counters = self.active_choice is not None,
njobs = self.njobs)
self.is_fitted = True
return self
Only used when passing online=True.
batch_size : int
After how many rounds to refit the policy being evaluated.
Only used when passing online=True.
Returns
-------
result : tuple (float, int)
Estimated mean reward and number of observations taken.
References
----------
.. [1] Li, Lihong, et al. "A contextual-bandit approach to personalized news article recommendation."
Proceedings of the 19th international conference on World wide web. ACM, 2010.
"""
X,a,r=_check_fit_input(X,a,r)
if start_point_online=='random':
start_point_online=np.random.randint(X.shape[0])
else:
if isinstance(start_point_online, int):
pass
elif isinstance(start_point_online, float):
pass
else:
raise ValueError("'start_point_online' must be one of 'random', float [0,1] or int [0, sample_size]")
if not online:
pred=policy.predict(X)
match=pred==a
return (np.mean(r[match]), match.sum())
else:
cum_r=0
Number of arms/labels to choose from.
Only used when passing a classifier object to 'reward_estimator'.
handle_invalid : bool
Whether to replace 0/1 estimated rewards with randomly-generated numbers (see Note)
c : None or float
Constant by which to multiply all scores from the exploration policy.
pmin : None or float
Scores (from the exploration policy) will be converted to the minimum between
pmin and the original estimate.
References
----------
.. [1] Dudík, Miroslav, John Langford, and Lihong Li. "Doubly robust policy evaluation and learning."
arXiv preprint arXiv:1103.4601 (2011).
"""
X,a,r=_check_fit_input(X,a,r)
p=_check_1d_inp(p)
pred=_check_1d_inp(pred)
assert p.shape[0]==X.shape[0]
assert pred.shape[0]==X.shape[0]
if c is not None:
assert isinstance(c, float)
if pmin is not None:
assert isinstance(pmin, float)
if type(reward_estimator)==np.ndarray:
assert reward_estimator.shape[1]==2
assert reward_estimator.shape[0]==X.shape[0]
rhat_new = reward_estimator[:, 0]
rhat_old = reward_estimator[:, 1]
elif 'predict_proba_separate' in dir(reward_estimator):
rhat = reward_estimator.predict_proba_separate(X)