Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_df_tcol_old_vs_new():
# compare the results of the old and new implementation
n = 3
df = create_testdata(n, np.array(list(range(n))))
new_err = df_metrics.tcol_error(df)
df = df.iloc[:, :n]
old_err_x, old_err_y, old_err_z = df_metrics.old_tcol_error(df)
print(new_err)
print(old_err_x, old_err_y, old_err_z)
assert old_err_x == new_err[0].ds0
assert old_err_y == new_err[0].ds1
assert old_err_z == new_err[0].ds2
def df_snr(realdata=False, n=3):
if realdata:
df = read_realdata(n=n).dropna()
else:
df = create_testdata(n, np.array(list(range(n))))
snr, err, beta = df_metrics.tcol_snr(df, ref_ind=0)
print('snr')
pprint(snr)
print('err')
pprint(err)
print('beta')
pprint(beta)
print('------------------')
if n == 3:
old_snr, old_err, old_beta = \
metrics.tcol_snr(df.iloc[:,0].values,
df.iloc[:,1].values,
df.iloc[:,2].values)
print('old_snr')
pprint(old_snr)
print('old_err')
def test_df_snr_err():
n = 4
df = create_testdata(n, np.array(list(range(n))))
snr, err, beta = df_metrics.tcol_snr(df, ref_ind=0)
assert len(snr) == len(err) == len(beta) == n
# field names
assert snr[0]._fields == err[0]._fields == beta[0]._fields == ('ds0', 'ds1', 'ds2')
assert snr[1]._fields == err[1]._fields == beta[1]._fields == ('ds0', 'ds1', 'ds3')
assert snr[2]._fields == err[2]._fields == beta[2]._fields == ('ds0', 'ds2', 'ds3')
assert snr[3]._fields == err[3]._fields == beta[3]._fields == ('ds1', 'ds2', 'ds3')
# test some values
np.testing.assert_almost_equal(snr[0].ds0, -7.9553239335)
np.testing.assert_almost_equal(err[1].ds0, 0.2511626266)
np.testing.assert_almost_equal(beta[2].ds0, 1.) # must be 1 as there is no bias
np.testing.assert_almost_equal(snr[3].ds3, np.nan)
# calculate RSS
rss = df_metrics.RSS(data)
rss_dict = rss._asdict()
# calulcate tau
if self.calc_tau:
tau, p_tau = df_metrics.kendalltau(data)
tau_dict, p_tau_dict = tau._asdict(), p_tau._asdict()
else:
tau = p_tau = p_tau_dict = tau_dict = None
# No extra scaling is performed here.
# always scale for ubRMSD with mean std
# calculate ubRMSD
data_scaled = scale(data, method='mean_std')
ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
ubRMSD_dict = ubRMSD_nT._asdict()
for tds_name in self.tds_names:
R, p_R = pearson_R[tds_name], pearson_p[tds_name]
rho, p_rho = spea_rho[tds_name], spea_p[tds_name]
bias = bias_dict[tds_name]
mse = mse_dict[tds_name]
mse_corr = mse_corr_dict[tds_name]
mse_bias = mse_bias_dict[tds_name]
mse_var = mse_var_dict[tds_name]
rmsd = rmsd_dict[tds_name]
ubRMSD = ubRMSD_dict[tds_name]
rss = rss_dict[tds_name]
if tau_dict and p_tau_dict:
tau = tau_dict[tds_name]
z = data[self.df_columns[2]].values[subset]
snr, err, beta = metrics.tcol_snr(x, y, z)
for i, name in enumerate(self.ds_names):
dataset['{:}_{:}_snr'.format(name, season)][0] = snr[i]
dataset['{:}_{:}_err_var'.format(name, season)][0] = err[i]
dataset['{:}_{:}_beta'.format(name, season)][0] = beta[i]
# calculate Pearson correlation
pearson_R, pearson_p = df_metrics.pearsonr(data)
pearson_R = pearson_R._asdict()
pearson_p = pearson_p._asdict()
# calculate Spearman correlation
spea_rho, spea_p = df_metrics.spearmanr(data)
spea_rho = spea_rho._asdict()
spea_p = spea_p._asdict()
# scale data to reference in order to calculate absolute metrics
data_scaled = scale(data, method='min_max')
# calculate bias
bias_nT = df_metrics.bias(data_scaled)
bias_dict = bias_nT._asdict()
# calculate ubRMSD
ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
ubRMSD_dict = ubRMSD_nT._asdict()
for tds_name in self.tds_names:
R = pearson_R[tds_name]
dataset['n_obs'][0] = n_obs
# calculate Pearson correlation
pearson_R, pearson_p = df_metrics.pearsonr(data)
pearson_R, pearson_p = pearson_R._asdict(), pearson_p._asdict()
# calculate Spearman correlation
spea_rho, spea_p = df_metrics.spearmanr(data)
spea_rho, spea_p = spea_rho._asdict(), spea_p._asdict()
# calculate bias
bias_nT = df_metrics.bias(data)
bias_dict = bias_nT._asdict()
# calculate RMSD
rmsd = df_metrics.rmsd(data)
rmsd_dict = rmsd._asdict()
# calculate MSE
mse, mse_corr, mse_bias, mse_var = df_metrics.mse(data)
mse_dict, mse_corr_dict, mse_bias_dict, mse_var_dict = \
mse._asdict(), mse_corr._asdict(), mse_bias._asdict(), mse_var._asdict()
# calculate RSS
rss = df_metrics.RSS(data)
rss_dict = rss._asdict()
# calulcate tau
if self.calc_tau:
tau, p_tau = df_metrics.kendalltau(data)
tau_dict, p_tau_dict = tau._asdict(), p_tau._asdict()
else:
plt.xlabel(label_ascat)
plt.ylabel(label_insitu)
plt.show()
# calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
x, y = scaled_data[label_ascat].values, scaled_data[label_insitu].values
print("ISMN time series:", ISMN_time_series)
print("compared to")
print(ascat_time_series)
print("Results:")
# df_metrics takes a DataFrame as input and automatically
# calculates the metric on all combinations of columns
# returns a named tuple for easy printing
print(df_metrics.pearsonr(scaled_data))
print("Spearman's (rho,p_value)", metrics.spearmanr(x, y))
print("Kendalls's (tau,p_value)", metrics.kendalltau(x, y))
print(df_metrics.kendalltau(scaled_data))
print(df_metrics.rmsd(scaled_data))
print("Bias", metrics.bias(x, y))
print("Nash Sutcliffe", metrics.nash_sutcliffe(x, y))
i += 1
# only show the first 2 stations, otherwise this program would run a long time
# and produce a lot of plots
if i >= 2:
break