Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_print_summary_with_decimals(self, rossi, cph):
import sys
saved_stdout = sys.stdout
try:
out = StringIO()
sys.stdout = out
cph = CoxPHFitter()
cph.fit(rossi, duration_col="week", event_col="arrest", batch_mode=True)
cph._time_fit_was_called = "2018-10-23 02:40:45 UTC"
cph.print_summary(decimals=1)
output_dec_1 = out.getvalue().strip().split()
cph.print_summary(decimals=3)
output_dec_3 = out.getvalue().strip().split()
assert output_dec_1 != output_dec_3
finally:
sys.stdout = saved_stdout
cph.fit(rossi, duration_col="week", event_col="arrest", batch_mode=False)
def test_proportional_hazard_test_with_log_transform():
cph = CoxPHFitter()
df = load_regression_dataset()
cph.fit(df, "T", "E")
results = stats.proportional_hazard_test(cph, df, time_transform="log")
npt.assert_allclose(results.summary.loc["var1"]["test_statistic"], 2.227627, rtol=1e-3)
npt.assert_allclose(results.summary.loc["var2"]["test_statistic"], 0.714427, rtol=1e-3)
npt.assert_allclose(results.summary.loc["var3"]["test_statistic"], 1.466321, rtol=1e-3)
npt.assert_allclose(results.summary.loc["var3"]["p"], 0.225927, rtol=1e-3)
r$var
r$naive.var
residuals(r, type='dfbeta')
"""
df = pd.DataFrame(
{
"var1": [0.209325, 0.693919, 0.443804, 0.065636, 0.386294],
"var2": [0.184677, 0.071893, 1.364646, 0.098375, 1.663092],
"T": [1, 2, 3, 4, 5],
"var3": [2, 2, 2, 1, 2],
}
)
df["E"] = 1
cph = CoxPHFitter()
cph.fit(df, "T", "E", robust=True, weights_col="var3", show_progress=True)
expected = pd.Series({"var1": 1.431, "var2": -1.277})
assert_series_equal(cph.hazards_.T["coef"], expected, check_less_precise=2, check_names=False)
expected_cov = np.array([[3.5439245, -0.3549099], [-0.3549099, 0.4499553]])
npt.assert_array_almost_equal(
cph.variance_matrix_, expected_cov, decimal=1
) # not as precise because matrix inversion will accumulate estimation errors.
expected = pd.Series({"var1": 2.094, "var2": 0.452})
assert_series_equal(cph.summary["se(coef)"], expected, check_less_precise=2, check_names=False)
def test_output_with_strata_against_R(self, rossi):
"""
rossi <- read.csv('.../lifelines/datasets/rossi.csv')
r = coxph(formula = Surv(week, arrest) ~ fin + age + strata(race,
paro, mar, wexp) + prio, data = rossi)
"""
expected = np.array([[-0.3355, -0.0590, 0.1002]])
cf = CoxPHFitter()
cf.fit(
rossi, duration_col="week", event_col="arrest", strata=["race", "paro", "mar", "wexp"], show_progress=True
)
npt.assert_array_almost_equal(cf.hazards_.values, expected, decimal=4)
def test_p_value_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
# see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
df = load_larynx()
cf = CoxPHFitter()
cf.fit(df, duration_col="time", event_col="death")
# p-values
actual_p = cf._compute_p_values()
expected_p = np.array([0.1847, 0.7644, 0.0730, 0.00])
npt.assert_array_almost_equal(actual_p, expected_p, decimal=2)
def test_crossval_for_cox_ph_with_normalizing_times(self, data_pred2, data_pred1):
cf = CoxPHFitter()
for data_pred in [data_pred1, data_pred2]:
# why does this
data_norm = data_pred.copy()
times = data_norm["t"]
# Normalize to mean = 0 and standard deviation = 1
times -= np.mean(times)
times /= np.std(times)
data_norm["t"] = times
scores = k_fold_cross_validation(
cf, data_norm, duration_col="t", event_col="E", k=3, predictor="predict_partial_hazard"
)
mean_score = 1 - np.mean(scores)
plt.plot(breaks,np.concatenate(([1],np.cumprod(y_pred[0,:]))),'bo-')
plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate,color='k')
kmf.fit(t[int(n_samples/2)+1:], event_observed=f[int(n_samples/2)+1:])
plt.plot(breaks,np.concatenate(([1],np.cumprod(y_pred[-1,:]))),'ro-')
plt.plot(kmf.survival_function_.index.values, kmf.survival_function_.KM_estimate,color='k')
plt.xticks(np.arange(0, 2000.0001, 200))
plt.yticks(np.arange(0, 1.0001, 0.125))
plt.xlim([0,2000])
plt.ylim([0,1])
plt.xlabel('Follow-up time (days)')
plt.ylabel('Proportion surviving')
plt.title('One covariate. Actual=black, predicted=blue/red.')
plt.show()
myData=pd.DataFrame({'x_train' : x_train, 't' : t, 'f' : f})
cf = CoxPHFitter()
cf.fit(myData, 't', event_col='f')
cox_coef = cf.hazards_.x_train.values[0]
nn_coef = model.get_weights()[0][0][0]
print('Cox model coefficient:')
print(cox_coef)
print('Cox model hazard ratio:')
print(np.exp(cox_coef))
print('Neural network coefficient:')
print(nn_coef)
print('Neural network hazard ratio:')
print(np.exp(nn_coef))
##########################################################
#Proportional hazards model with flexible baseline hazard:
#Multiple variables, mix of discrete and continuous
W = weibull_min.rvs(1, scale=1, loc=0, size=N)
Y = bX * X + bZ * Z + np.log(W)
T = np.exp(Y)
#######################################
df = pd.DataFrame({"T": T, "x": X, "x_": X_})
wf = WeibullAFTFitter().fit(df, "T")
wf.print_summary(4)
cph = CoxPHFitter().fit(df, "T", show_progress=True, step_size=1.0)
cph.print_summary(4)
def _run(self):
self._cf = lifelines.CoxPHFitter()
self._cf.fit(self.df, self.survival_col, event_col=self.cens_col, include_likelihood=True)
SpearmanP.append(SpearmanP_temp)
# TODO: override with new survival code
if survival:
# Extract time to event and event from label data
E_truth = np.asarray([labels[1][k][0] for k in test_indices])
T_truth = np.asarray([labels[2][k][0] for k in test_indices])
# Concordance index
cindex.append(1 - ll.utils.concordance_index(T_truth, y_prediction, E_truth))
# Fit Cox model using SVR output, time to event and event
data = {'predict': y_prediction, 'E': E_truth, 'T': T_truth}
data = pd.DataFrame(data=data, index=test_patient_IDs)
cph = ll.CoxPHFitter()
cph.fit(data, duration_col='T', event_col='E')
coxcoef.append(cph.summary['coef']['predict'])
coxp.append(cph.summary['p']['predict'])
if output in ['scores', 'decision']:
# Return the scores and true values of all patients
return y_truths, y_scores, y_predictions, pids
elif output == 'stats':
# Compute statistics
# Extract sample si ze
N_1 = float(len(train_patient_IDs))
N_2 = float(len(test_patient_IDs))
# Compute alpha confidence intervals (CIs)
stats = dict()