How to use the contextualbandits.utils._check_1d_inp function in contextualbandits

To help you get started, we’ve selected a few contextualbandits examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github david-cortes / contextualbandits / contextualbandits / offpolicy.py View on Github external
Parameters
        ----------
        X : array (n_samples, n_features)
            Matrix of covariates for the available data.
        a : array (n_samples), int type
            Arms or actions that were chosen for each observations.
        r : array (n_samples), {0,1}
            Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
        p : array (n_samples)
            Reward estimates for the actions that were chosen by the policy.
        """
        try:
            from costsensitive import RegressionOneVsRest, WeightedAllPairs
        except:
            raise ValueError("This functionality requires package 'costsensitive'.\nCan be installed with 'pip install costsensitive'.")
        p = _check_1d_inp(p)
        assert p.shape[0] == X.shape[0]
        l = -r
        
        if type(self.reward_estimator) == np.ndarray:
            C = self.reward_estimator
        elif 'predict_proba_separate' in dir(self.reward_estimator):
            C = -self.reward_estimator.predict_proba_separate(X)
        elif 'predict_proba' in dir(self.reward_estimator):
            reward_estimator = SeparateClassifiers(self.reward_estimator, self.nchoices, beta_prior = self.beta_prior, smoothing = self.smoothing)
            reward_estimator.fit(X, a, r)
            C = -reward_estimator.predict_proba_separate(X)
        else:
            raise ValueError("Error: couldn't obtain reward estimates. Are you passing the right input to 'reward_estimator'?")
        
        if self.handle_invalid:
            C[C == 1] = np.random.beta(3, 1, size = C.shape)[C == 1]
github david-cortes / contextualbandits / contextualbandits / evaluation.py View on Github external
Only used when passing a classifier object to 'reward_estimator'.
    handle_invalid : bool
        Whether to replace 0/1 estimated rewards with randomly-generated numbers (see Note)
    c : None or float
        Constant by which to multiply all scores from the exploration policy.
    pmin : None or float
        Scores (from the exploration policy) will be converted to the minimum between
        pmin and the original estimate.
    
    References
    ----------
    .. [1] Dudík, Miroslav, John Langford, and Lihong Li. "Doubly robust policy evaluation and learning."
           arXiv preprint arXiv:1103.4601 (2011).
    """
    X,a,r=_check_fit_input(X,a,r)
    p=_check_1d_inp(p)
    pred=_check_1d_inp(pred)
    assert p.shape[0]==X.shape[0]
    assert pred.shape[0]==X.shape[0]
    if c is not None:
        assert isinstance(c, float)
    if pmin is not None:
        assert isinstance(pmin, float)
    
    if type(reward_estimator)==np.ndarray:
        assert reward_estimator.shape[1]==2
        assert reward_estimator.shape[0]==X.shape[0]
        rhat_new = reward_estimator[:, 0]
        rhat_old = reward_estimator[:, 1]
    elif 'predict_proba_separate' in dir(reward_estimator):
        rhat = reward_estimator.predict_proba_separate(X)
        rhat_new = rhat[np.arange(rhat.shape[0]), pred]
github david-cortes / contextualbandits / contextualbandits / logistic.py View on Github external
def _check_inputs(self, X, y, sample_weight):
		X = _check_X_input(X)
		y = _check_1d_inp(y)
		assert X.shape[0] == y.shape[0]
		if sample_weight is None:
			sample_weight = np.ones(X.shape[0])
		assert sample_weight.shape[0] == X.shape[0]
		sample_weight /= sample_weight.sum()
		return X, y, sample_weight
github david-cortes / contextualbandits / contextualbandits / offpolicy.py View on Github external
"""
        Fits the Offset Tree estimator to partially-labeled data collected from a different policy.
        
        Parameters
        ----------
        X : array (n_samples, n_features)
            Matrix of covariates for the available data.
        a : array (n_samples), int type
            Arms or actions that were chosen for each observations.
        r : array (n_samples), {0,1}
            Rewards that were observed for the chosen actions. Must be binary rewards 0/1.
        p : array (n_samples)
            Reward estimates for the actions that were chosen by the policy.
        """
        X, a, r = _check_fit_input(X, a, r)
        p = _check_1d_inp(p)
        assert p.shape[0] == X.shape[0]
        
        if self.c is not None:
            p = self.c * p
        if self.pmin is not None:
            p = np.clip(p, a_min = self.pmin, a_max = None)
        
        self._oracles = [deepcopy(self.base_algorithm) for c in range(self.nchoices - 1)]
        Parallel(n_jobs=self.njobs, verbose=0, require="sharedmem")(delayed(self._fit)(classif, X, a, r, p) for classif in range(len(self._oracles)))
github david-cortes / contextualbandits / contextualbandits / evaluation.py View on Github external
handle_invalid : bool
        Whether to replace 0/1 estimated rewards with randomly-generated numbers (see Note)
    c : None or float
        Constant by which to multiply all scores from the exploration policy.
    pmin : None or float
        Scores (from the exploration policy) will be converted to the minimum between
        pmin and the original estimate.
    
    References
    ----------
    .. [1] Dudík, Miroslav, John Langford, and Lihong Li. "Doubly robust policy evaluation and learning."
           arXiv preprint arXiv:1103.4601 (2011).
    """
    X,a,r=_check_fit_input(X,a,r)
    p=_check_1d_inp(p)
    pred=_check_1d_inp(pred)
    assert p.shape[0]==X.shape[0]
    assert pred.shape[0]==X.shape[0]
    if c is not None:
        assert isinstance(c, float)
    if pmin is not None:
        assert isinstance(pmin, float)
    
    if type(reward_estimator)==np.ndarray:
        assert reward_estimator.shape[1]==2
        assert reward_estimator.shape[0]==X.shape[0]
        rhat_new = reward_estimator[:, 0]
        rhat_old = reward_estimator[:, 1]
    elif 'predict_proba_separate' in dir(reward_estimator):
        rhat = reward_estimator.predict_proba_separate(X)
        rhat_new = rhat[np.arange(rhat.shape[0]), pred]
        rhat_old = rhat[np.arange(rhat.shape[0]), a]