Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def predict_percentile(self, df, *, p=0.5, conditional_after=None) -> pd.DataFrame:
subjects = utils._get_index(df)
return utils.qth_survival_times(
p, self.predict_survival_function(df, conditional_after=conditional_after)[subjects]
).T
the following on the original dataset, df: `df.groupby(%s).size()`. Expected is that %s is not present in the output.
"""
% (stratum, self.strata, stratum)
)
col = _get_index(stratified_X)
v = self.predict_partial_hazard(stratified_X)
cumulative_hazard_ = cumulative_hazard_.merge(
pd.DataFrame(np.dot(c_0, v.T), index=c_0.index, columns=col),
how="outer",
right_index=True,
left_index=True,
)
else:
c_0 = self.baseline_cumulative_hazard_
v = self.predict_partial_hazard(X)
col = _get_index(v)
cumulative_hazard_ = pd.DataFrame(np.dot(c_0, v.T), columns=col, index=c_0.index)
if times is not None:
# non-linear interpolations can push the survival curves above 1 and below 0.
return cumulative_hazard_.reindex(cumulative_hazard_.index.union(times)).interpolate("index").loc[times]
return cumulative_hazard_
the cumulative hazard of individuals over the timeline
"""
if self.strata:
cumulative_hazard_ = pd.DataFrame()
for stratum, stratified_X in X.groupby(self.strata):
try:
c_0 = self.baseline_cumulative_hazard_[[stratum]]
except KeyError:
raise StatError(
"""The stratum %s was not found in the original training data. For example, try
the following on the original dataset, df: `df.groupby(%s).size()`. Expected is that %s is not present in the output.
"""
% (stratum, self.strata, stratum)
)
col = _get_index(stratified_X)
v = self.predict_partial_hazard(stratified_X)
cumulative_hazard_ = cumulative_hazard_.merge(
pd.DataFrame(np.dot(c_0, v.T), index=c_0.index, columns=col),
how="outer",
right_index=True,
left_index=True,
)
else:
c_0 = self.baseline_cumulative_hazard_
v = self.predict_partial_hazard(X)
col = _get_index(v)
cumulative_hazard_ = pd.DataFrame(np.dot(c_0, v.T), columns=col, index=c_0.index)
if times is not None:
# non-linear interpolations can push the survival curves above 1 and below 0.
return cumulative_hazard_.reindex(cumulative_hazard_.index.union(times)).interpolate("index").loc[times]
return self.predict_cumulative_hazard(df.to_frame().T)
if conditional_after is not None:
raise NotImplementedError()
times = np.atleast_1d(coalesce(times, self.timeline, np.unique(self.durations))).astype(float)
n = times.shape[0]
times = times.reshape((n, 1))
lambdas_ = self._prep_inputs_for_prediction_and_return_parameters(df)
bp = self.breakpoints
M = np.minimum(np.tile(bp, (n, 1)), times)
M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)])
return pd.DataFrame(np.dot(M, (1 / lambdas_)), columns=_get_index(df), index=times[:, 0])
-------
expectations : DataFrame
Notes
-----
If X is a DataFrame, the order of the columns do not matter. But
if X is an array, then the column ordering is assumed to be the
same as the training dataset.
See Also
--------
predict_median
predict_percentile
"""
warnings.warn("""Approximating the expected value using trapezoid rule.\n""", utils.ApproximationWarning)
subjects = utils._get_index(X)
v = self.predict_survival_function(X)[subjects]
return pd.DataFrame(trapz(v.values.T, v.index), index=subjects)
a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
can be in any order. If a numpy array, columns must be in the
same order as the training data.
p: float, optional (default=0.5)
the percentile, must be between 0 and 1.
Returns
-------
percentiles: DataFrame
See Also
--------
predict_median
"""
subjects = _get_index(X)
return qth_survival_times(p, self.predict_survival_function(X)[subjects]).T
can be in any order. If a numpy array, columns must be in the
same order as the training data.
Returns
-------
percentiles: DataFrame
the median lifetimes for the individuals. If the survival curve of an
individual does not cross 0.5, then the result is infinity.
See Also
--------
predict_median
"""
lambda_, rho_ = self._prep_inputs_for_prediction_and_return_scores(df, ancillary_df)
return pd.DataFrame((lambda_ * gamma(1 + 1 / rho_)), index=_get_index(df))