Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dfs[label[1]]['naive_coverage'],
dfs[label[1]]['naive_pval'],
dfs[label[1]]['naive_length'])
print("selective:", np.mean(P), np.std(P), np.mean(L), np.mean(coverage))
print("naive:", np.mean(naive_P), np.std(naive_P), np.mean(naive_L), np.mean(naive_coverage))
print("len ratio selective divided by naive:", np.mean(np.array(L) / np.array(naive_L)))
probit_P, naive_P = dfs['probit']['pval'], dfs['probit']['naive_pval']
logit_P = dfs['logit']['pval']
plt.clf()
plt.plot(U, sm.distributions.ECDF(probit_P)(U), 'c', linewidth=3, label = "fit probit")
plt.plot(U, sm.distributions.ECDF(logit_P)(U), 'b', linewidth=3, label="fit logit")
plt.plot(U, sm.distributions.ECDF(naive_P)(U), 'y', linewidth=3, label="naive")
plt.plot([0, 1], [0, 1], 'k--', linewidth=2)
plt.xlabel("Observed pivot", fontsize=18)
plt.ylabel("Proportion (empirical CDF)", fontsize=18)
plt.title("Pivots", fontsize=20)
plt.legend(fontsize=18, loc="lower right")
plt.savefig('cv_pivots.pdf')
'Naive': 'r',
'Bonferroni': 'gray',
'Lee':'gray',
'Strawman':'gray'},
figsize=(8, 8), straw=False):
f = plt.figure(figsize=figsize)
new_df = pd.DataFrame({'Learned': df['pivot'],
'Naive': df['naive_pivot']})
if straw:
new_df = pd.DataFrame({'Learned': new_df['Learned'],
'Strawman': new_df['Naive']})
U = np.linspace(0, 1, 101)
ax = f.gca()
for k in new_df.keys():
plt.plot(U, sm.distributions.ECDF(new_df[k])(U), color=palette[k], label=k, linewidth=5)
plt.plot([0,1], [0,1], 'k--', linewidth=3)
ax.set_xlabel('pivot', fontsize=20)
ax.set_ylabel('ECDF(pivot)', fontsize=20)
ax.legend(fontsize=15)
pngfile = outbase + '_pivot.png'
plt.savefig(pngfile, dpi=300)
return ax, f, pngfile, df, new_df
import statsmodels.api as sm
import matplotlib.pyplot as plt
n = 100
U = np.linspace(0, 1, 101)
P, L, coverage = [], [], []
plt.clf()
for i in range(300):
p, cover, l = simulate(n=n)
coverage.extend(cover)
P.extend(p)
L.extend(l)
print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), np.mean(coverage))
plt.clf()
plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.show()
# Standard or log plot
obs_y = obs[OBS_HEADER_STREAMFLOW]
if plottype == PLOT_TYPE_CDF:
obs_ecdf = sm.distributions.ECDF(obs_y)
obs_y = obs_ecdf(x)
obs_plt = None
if not args.supressObs:
(obs_plt,) = ax.plot(x, obs_y, obs_color, linewidth=2)
# Plot modeled values
data_plt = []
for c in columns:
# Standard or log plot
mod_y = data[c]
if plottype == PLOT_TYPE_CDF:
mod_ecdf = sm.distributions.ECDF(data[c])
mod_y = mod_ecdf(x)
(mod_plt,) = ax.plot(x, mod_y, color=mod_color, linewidth=1)
data_plt.append(mod_plt)
# X-axis
if plottype == PLOT_TYPE_STD or \
plottype == PLOT_TYPE_LOGY:
num_years = len(x) / 365
if num_years > 2:
if num_years > 5:
ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
else:
ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=3))
else:
ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator())
ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b-%Y'))
csvfile = 'HIV_CV.csv'
outbase = csvfile[:-4]
if df is not None and i > 0:
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
df.to_csv(csvfile, index=False)
if len(df['pivot']) > 0:
pivot_ax, lengths_ax = pivot_plot(df, outbase)
liu_pivot = df['liu_pivot']
liu_pivot = liu_pivot[~np.isnan(liu_pivot)]
pivot_ax.plot(U, sm.distributions.ECDF(liu_pivot)(U), 'gray', label='Liu CV',
linewidth=3)
pivot_ax.legend()
fig = pivot_ax.figure
fig.savefig(csvfile[:-4] + '.pdf')
df = simulate(B=10000)
csvfile = 'keras_targets.csv'
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if df is not None and len(df['pivot']) > 0:
print(df['pivot'], 'pivot')
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
for b in np.unique(df['batch_size']):
plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
df.to_csv(csvfile, index=False)
print(np.mean(P), np.std(P), coverage / (i+1))
plt.gca().set_ylim([-5,0])
plt.show()
coverage = 0
L = []
for i in range(100):
p, cover, l = simulate()
L.append(l)
coverage += cover
P.append(p)
print(np.mean(P), np.std(P), np.mean(L) / (2 * 1.65 / np.sqrt(n)), coverage / (i+1))
plt.clf()
plt.plot(U, sm.distributions.ECDF(P)(U), 'r', linewidth=3)
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.show()
csvfile = 'gbm_targets.csv'
if i % 2 == 1 and i > 0:
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if len(df['pivot']) > 0:
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
for b in np.unique(df['batch_size']):
plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
df.to_csv(csvfile, index=False)
for i in range(500):
df = simulate(B=20000)
csvfile = 'keras_targets_BH_strong.csv'
try:
df = pd.concat([df, pd.read_csv(csvfile)])
except FileNotFoundError:
pass
if df is not None and len(df['pivot']) > 0:
print(df['pivot'], 'pivot')
plt.clf()
U = np.linspace(0, 1, 101)
plt.plot(U, sm.distributions.ECDF(df['naive_pivot'])(U), 'b', label='Naive', linewidth=3)
for b in np.unique(df['batch_size']):
plt.plot(U, sm.distributions.ECDF(np.array(df['pivot'])[np.array(df['batch_size']) == b])(U), label='B=%d' % b, linewidth=3)
plt.legend()
plt.plot([0,1], [0,1], 'k--', linewidth=2)
plt.savefig(csvfile[:-4] + '.pdf')
df.to_csv(csvfile, index=False)