Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
stratify_by (pd.Series|None): Categorical variable to stratify according to (num_subjects,).
Namely, aggregate within subgroups sharing the same values.
If not provided, the aggregation is on the entire
treatment_values (Any): Subset of values to stratify on from `stratify_by`.
If not supplied, all available stratification values are used.
Returns:
pd.Series[Any, float]: Series which index are treatment values, and the values are numbers - the
aggregated outcome for the strata of people whose assigned treatment is the key.
"""
if sample_weight is None:
sample_weight = pd.Series(data=1.0, index=y.index)
if treatment_values is None and stratify_by is None:
stratify_by = pd.Series(data=0, index=y.index)
treatment_values = get_iterable_treatment_values(treatment_values, stratify_by)
res = {}
for treatment_value in treatment_values:
subgroup_mask = stratify_by == treatment_value
aggregated_value = np.average(y[subgroup_mask], weights=sample_weight[subgroup_mask])
res[treatment_value] = aggregated_value
res = pd.Series(res)
return res
def estimate_individual_outcome(self, X, a, treatment_values=None, predict_proba=None):
treatment_values = g_tools.get_iterable_treatment_values(treatment_values, a)
res = {}
for treatment_value in treatment_values:
prediction = self._predict(X=X, treatment_value=treatment_value, predict_proba=predict_proba)
res[treatment_value] = prediction
# TODO: should combine the results by the observed treatment into additional vector?
res = pd.concat(res, axis="columns", names=[a.name or "a"])
return res
specific treatment and yields the relevant dataset.
Args:
X (pd.DataFrame): Covariate matrix of size (num_subjects, num_features).
a (pd.Series): Treatment assignment of size (num_subjects,).
y (pd.Series | None): Observed outcome of size (num_subjects,).
w (pd.Series | None): sample_weights
Yields:
(pd.DataFrame, pd.Series, Any): A three-tuple containing:
* the covariates for individual under specific treatment,
* the observed outcomes for these individuals (if y was passed and is not None),
* the current treatment value.
"""
treatment_values = g_tools.get_iterable_treatment_values(None, a)
for treatment_value in treatment_values:
treated = a == treatment_value
cur_X = X.loc[treated, :]
cur_y = y[treated] if y is not None else None
cur_w = w[treated] if w is not None else None
yield cur_X, cur_y, cur_w, treatment_value