Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
df = pd.DataFrame(
{
"var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
"T": [5.269797, 6.601666, 7.335846, 11.684092, 12.678458],
"strata": [1, 1, 1, 2, 2],
}
)
df["E"] = True
df["weights"] = 1
df = df.sort_values(by="T")
cph = CoxPHFitter()
cph.fit(df, "T", "E", show_progress=True, weights_col="weights", strata=["strata"])
df = df.set_index("strata")
X = normalize(df.drop(["T", "E", "weights"], axis=1), 0, cph._norm_std)
expected = np.array([[-0.6960789, 1.6729761, 0.3094744, -0.2895864, -0.9967852]]).T
actual = cph._compute_delta_beta(X, df["T"], df["E"], df["weights"])
npt.assert_allclose(expected, actual, rtol=0.001)
def test_normalize():
df = load_larynx()
n, d = df.shape
npt.assert_almost_equal(utils.normalize(df).mean(0).values, np.zeros(d))
npt.assert_almost_equal(utils.normalize(df).std(0).values, np.ones(d))
def test_unnormalize():
df = load_larynx()
m = df.mean(0)
s = df.std(0)
ndf = utils.normalize(df)
npt.assert_almost_equal(df.values, utils.unnormalize(ndf, m, s).values)
# Known AFT model
self._norm_mean_ = df[self.regressors[self._primary_parameter_name]].mean(0)
self._norm_mean_ancillary = df[self.regressors[self._ancillary_parameter_name]].mean(0)
_norm_std = df.std(0)
self._constant_cols = pd.Series(
[(_norm_std.loc[variable_name] < 1e-8) for (_, variable_name) in _index], index=_index
)
self._norm_std = pd.Series([_norm_std.loc[variable_name] for (_, variable_name) in _index], index=_index)
self._norm_std[self._constant_cols] = 1.0
_norm_std[_norm_std < 1e-8] = 1.0
_params, self.log_likelihood_, self._hessian_ = self._fit_model(
log_likelihood_function,
Ts,
self._create_Xs_dict(utils.normalize(df, 0, _norm_std)),
E.values,
weights.values,
entries.values,
show_progress=show_progress,
initial_point=initial_point,
)
self.params_ = _params / self._norm_std
self.variance_matrix_ = self._compute_variance_matrix()
self.standard_errors_ = self._compute_standard_errors(
Ts, E.values, weights.values, entries.values, self._create_Xs_dict(df)
)
self.confidence_intervals_ = self._compute_confidence_intervals()
if self._KNOWN_MODEL:
# too slow for non-KNOWN models
X, T, E, weights, original_index, self._clusters = self._preprocess_dataframe(df)
self.durations = T.copy()
self.event_observed = E.copy()
self.weights = weights.copy()
if self.strata is not None:
self.durations.index = original_index
self.event_observed.index = original_index
self.weights.index = original_index
self._norm_mean = X.mean(0)
self._norm_std = X.std(0)
hazards_ = self._newton_rhaphson(
normalize(X, self._norm_mean, self._norm_std),
T,
E,
weights=weights,
initial_beta=initial_beta,
show_progress=show_progress,
step_size=step_size,
)
self.hazards_ = pd.DataFrame(hazards_.T, columns=X.columns, index=["coef"]) / self._norm_std
self.variance_matrix_ = -inv(self._hessian_) / np.outer(self._norm_std, self._norm_std)
self.standard_errors_ = self._compute_standard_errors(
normalize(X, self._norm_mean, self._norm_std), T, E, weights
)
self.confidence_intervals_ = self._compute_confidence_intervals()
hazards_ = self._newton_rhaphson(
normalize(X, self._norm_mean, self._norm_std),
T,
E,
weights=weights,
initial_beta=initial_beta,
show_progress=show_progress,
step_size=step_size,
)
self.hazards_ = pd.DataFrame(hazards_.T, columns=X.columns, index=["coef"]) / self._norm_std
self.variance_matrix_ = -inv(self._hessian_) / np.outer(self._norm_std, self._norm_std)
self.standard_errors_ = self._compute_standard_errors(
normalize(X, self._norm_mean, self._norm_std), T, E, weights
)
self.confidence_intervals_ = self._compute_confidence_intervals()
self.baseline_hazard_ = self._compute_baseline_hazards(X, T, E, weights)
self.baseline_cumulative_hazard_ = self._compute_baseline_cumulative_hazard()
self.baseline_survival_ = self._compute_baseline_survival()
self._predicted_partial_hazards_ = self.predict_partial_hazard(X).values
return self
X = X[order]
pass_for_numeric_dtypes_or_raise(X)
elif isinstance(X, pd.Series) and ((X.shape[0] == len(hazard_names) + 2) or (X.shape[0] == len(hazard_names))):
X = X.to_frame().T
order = hazard_names
X = X[order]
pass_for_numeric_dtypes_or_raise(X)
elif isinstance(X, pd.Series):
assert len(hazard_names) == 1, "Series not the correct arugment"
X = pd.DataFrame(X)
pass_for_numeric_dtypes_or_raise(X)
X = X.astype(float)
index = _get_index(X)
X = normalize(X, self._norm_mean.values, 1)
return pd.DataFrame(np.dot(X, self.hazards_.T), index=index)
DataFrame
Note
-----
If X is a DataFrame, the order of the columns do not matter. But
if X is an array, then the column ordering is assumed to be the
same as the training dataset.
"""
if isinstance(X, pd.DataFrame):
order = self.params_.index
X = X[order]
check_for_numeric_dtypes_or_raise(X)
X = X.astype(float)
index = _get_index(X)
X = normalize(X, self._norm_mean.values, 1)
return pd.DataFrame(np.dot(X, self.params_), index=index)