Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
(pd.Series, pd.DataFrame, pd.Series): 3-element tuple containing:
- **treatment** (*pd.Series*): Treatment assignment to each sample.
- **propensity** (*pd.DataFrame*): The marginal conditional probability of treatment given covariates.
A DataFrame shaped (num_samples x num_of_possible_treatment_categories).
- **beta** (*pd.Series*): The coefficients used to generate current variable from it predecessors.
Raises:
ValueError: if prob_category is None (treatment must be categorical)
ValueError: If prob_category is not a legitimate probability vector (non negative, sums to 1)
"""
# Check input validity:
if prob_category is None:
raise ValueError("Treatment variable must be categorical, therefore it must have a legitimate distribution "
"over its possible values. Got None instead.")
CausalSimulator3._check_for_legitimate_probabilities(prob_category)
# generate only the continuous signal since it is later processed (therefore prob_category = None)
x_continuous, beta = self.generate_covariate_col(X_parents=X_parents, link_type=link_type, snr=snr,
prob_category=None, num_samples=X_parents.index.size,
var_name=var_name)
generation_method = self.TREATMENT_METHODS.get(method)
if generation_method is None:
raise KeyError("The given method {method} is not supported, "
"only {valid_methods}.".format(valid_methods=list(self.TREATMENT_METHODS.keys()),
method=method))
else:
params = self.params.get(var_name, {})
propensity, treatment = generation_method(x_continuous, prob_category, snr=snr, params=params)
return treatment.astype(int), propensity.astype(float), beta
discretized values.
Returns:
res (pd.Series): A continuous covariate column if prob_category is None, else a discrete column according
to the given probabilities.
bins (pd.Series): the bins
Raises:
ValueError: If prob_category is not a legitimate probability vector (non negative, sums to 1)
"""
if prob_category is None or x_col.nunique() <= prob_category.size:
res = x_col
bins = None
else: # should perform discretization
if bins is None: # should create new bins
CausalSimulator3._check_for_legitimate_probabilities(prob_category)
# make k-1 thresholds (based on quantiles of the cdf, count how many thresholds each samples crosses:
# see: https://en.wikipedia.org/wiki/Quantile_function#Definition
if method == "gaussian": # discretize according to percentiles drawn from normal distribution
bins = stats.norm(loc=0, scale=1).ppf(np.cumsum(prob_category)[:-1])
cutoffs = pd.DataFrame([x_col > thresh for thresh in bins]).T
res = cutoffs.sum(axis="columns")
elif method == "empiric": # discretize according to percentiles from the empirical data itself
try:
cumulative_ps = pd.Series(0, index=["null"]).append(prob_category).cumsum()
res, bins = pd.qcut(x=x_col, q=cumulative_ps,
labels=prob_category.index, retbins=True)
bins = pd.Series(data=bins, index=cumulative_ps.index)
# TODO: maybe noise this a little?
except ValueError as _:
warnings.warn("Error occurred while discretizing column using pd.qcut. "
"Probably the columns' values where already discrete (probably because it's "