Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_scale(method):
n = 1000
x = np.arange(n)
y = np.arange(n) * 0.5
df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
df_scaled = scaling.scale(df,
method=method,
reference_index=0)
nptest.assert_almost_equal(df_scaled['x'].values,
df_scaled['y'].values)
def test_scale_error(method):
n = 1000
x = np.arange(n)
y = np.arange(n) * 0.5
df = pd.DataFrame({'x': x, 'y': y}, columns=['x', 'y'])
with pytest.raises(KeyError):
df_scaled = scaling.scale(df,
method=method,
reference_index=0)
nptest.assert_almost_equal(df_scaled['x'].values,
df_scaled['y'].values)
# calculate RSS
rss = df_metrics.RSS(data)
rss_dict = rss._asdict()
# calulcate tau
if self.calc_tau:
tau, p_tau = df_metrics.kendalltau(data)
tau_dict, p_tau_dict = tau._asdict(), p_tau._asdict()
else:
tau = p_tau = p_tau_dict = tau_dict = None
# No extra scaling is performed here.
# always scale for ubRMSD with mean std
# calculate ubRMSD
data_scaled = scale(data, method='mean_std')
ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
ubRMSD_dict = ubRMSD_nT._asdict()
for tds_name in self.tds_names:
R, p_R = pearson_R[tds_name], pearson_p[tds_name]
rho, p_rho = spea_rho[tds_name], spea_p[tds_name]
bias = bias_dict[tds_name]
mse = mse_dict[tds_name]
mse_corr = mse_corr_dict[tds_name]
mse_bias = mse_bias_dict[tds_name]
mse_var = mse_var_dict[tds_name]
rmsd = rmsd_dict[tds_name]
ubRMSD = ubRMSD_dict[tds_name]
rss = rss_dict[tds_name]
if tau_dict and p_tau_dict:
matched_data = temp_match.matching(ascat_time_series.data, ISMN_time_series.data,
window=1 / 24.)
# matched ISMN data is now a dataframe with the same datetime index
# as ascat_time_series.data and the nearest insitu observation
# continue only with relevant columns
matched_data = matched_data[[label_ascat, label_insitu]]
# the plot shows that ISMN and ASCAT are observed in different units
matched_data.plot(figsize=(15, 5), secondary_y=[label_ascat],
title='temporally merged data')
plt.show()
# this takes the matched_data DataFrame and scales all columns to the
# column with the given reference_index, in this case in situ
scaled_data = scaling.scale(matched_data, method='lin_cdf_match',
reference_index=1)
# now the scaled ascat data and insitu_sm are in the same space
scaled_data.plot(figsize=(15, 5), title='scaled data')
plt.show()
plt.scatter(scaled_data[label_ascat].values, scaled_data[label_insitu].values)
plt.xlabel(label_ascat)
plt.ylabel(label_insitu)
plt.show()
# calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
x, y = scaled_data[label_ascat].values, scaled_data[label_insitu].values
print("ISMN time series:", ISMN_time_series)
print("compared to")
matched_data = temp_match.matching(ascat_time_series.data, ISMN_time_series.data,
window=1 / 24.)
# matched ISMN data is now a dataframe with the same datetime index
# as ascat_time_series.data and the nearest insitu observation
# continue only with relevant columns
matched_data = matched_data[[label_ascat, label_insitu]]
# the plot shows that ISMN and ASCAT are observed in different units
matched_data.plot(figsize=(15, 5), secondary_y=[label_ascat],
title='temporally merged data')
plt.show()
# this takes the matched_data DataFrame and scales all columns to the
# column with the given reference_index, in this case in situ
scaled_data = scaling.scale(matched_data, method='lin_cdf_match',
reference_index=1)
# now the scaled ascat data and insitu_sm are in the same space
scaled_data.plot(figsize=(15, 5), title='scaled data')
plt.show()
plt.scatter(scaled_data[label_ascat].values, scaled_data[label_insitu].values)
plt.xlabel(label_ascat)
plt.ylabel(label_insitu)
plt.show()
# calculate correlation coefficients, RMSD, bias, Nash Sutcliffe
x, y = scaled_data[label_ascat].values, scaled_data[label_insitu].values
print "ISMN time series:", ISMN_time_series
print "compared to"
data: pandas.DataFrame
temporally matched dataset
reference_index: int
Which column of the data contains the
scaling reference.
gpi_info: tuple
tuple of at least, (gpi, lon, lat)
Where gpi has to be the grid point indices
of the grid of this scaler.
Raises
------
ValueError
if scaling is not successful
"""
return scaling.scale(data,
method=self.method,
reference_index=reference_index)
dataset['{:}_{:}_snr'.format(name, season)][0] = snr[i]
dataset['{:}_{:}_err_var'.format(name, season)][0] = err[i]
dataset['{:}_{:}_beta'.format(name, season)][0] = beta[i]
# calculate Pearson correlation
pearson_R, pearson_p = df_metrics.pearsonr(data)
pearson_R = pearson_R._asdict()
pearson_p = pearson_p._asdict()
# calculate Spearman correlation
spea_rho, spea_p = df_metrics.spearmanr(data)
spea_rho = spea_rho._asdict()
spea_p = spea_p._asdict()
# scale data to reference in order to calculate absolute metrics
data_scaled = scale(data, method='min_max')
# calculate bias
bias_nT = df_metrics.bias(data_scaled)
bias_dict = bias_nT._asdict()
# calculate ubRMSD
ubRMSD_nT = df_metrics.ubrmsd(data_scaled)
ubRMSD_dict = ubRMSD_nT._asdict()
for tds_name in self.tds_names:
R = pearson_R[tds_name]
p_R = pearson_p[tds_name]
rho = spea_rho[tds_name]
p_rho = spea_p[tds_name]
bias = bias_dict[tds_name]
ubRMSD = ubRMSD_dict[tds_name]