Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
('lutevent_y', 'f8'),
('lutevent_energy','f8'),
('lutevent_count', 'f4'),
('lutevent_flash_count', 'f4'),
('lutevent_group_count', 'f4'),
('lutevent_total_flash_area', 'f8'),
('lutevent_total_group_area', 'f8'),
('lutevent_time_offset', '
def gcnet2nc(args, input_file, output_file, stations):
df = init_dataframe(args, input_file)
station_number = df['station_number'][0]
df.drop('station_number', axis=1, inplace=True)
ds = xr.Dataset.from_dataframe(df)
ds = ds.drop('time')
surface_temp = extrapolate_temp(df)
common.log(args, 2, 'Retrieving latitude, longitude and station name')
latitude, longitude, station_name = get_station(args, input_file, stations)
common.log(args, 3, 'Calculating time and sza')
month, day, hour, minutes, time, time_bounds, sza, az = get_time_and_sza(
args, df, longitude, latitude)
common.log(args, 4, 'Calculating quality control variables')
fill_dataset_quality_control(df, ds, input_file)
if args.no_drv_tm:
pass
def ds(self):
"""The dataset of the :attr:`data` DataFrame"""
import xarray as xr
ds = xr.Dataset.from_dataframe(self.data.set_index('wetf'))
ds.wetf.attrs['long_name'] = 'Fraction of wet days'
ds.p11.attrs['long_name'] = 'Prob. Wet then Wet'
ds.p101.attrs['long_name'] = 'Prob. Wet then Dry then Wet'
ds.p001.attrs['long_name'] = 'Prob. Dry then Dry then Wet'
ds.p11.attrs['symbol'] = 'p_{11}'
ds.p101.attrs['symbol'] = 'p_{101}'
ds.p001.attrs['symbol'] = 'p_{001}'
return ds
obs.columns = [col[1] for col in obs.columns.values]
return obs
elif data_structure == 'array':
obs = obs.unstack(level=1)
obs.columns = [col[1] for col in obs.columns.values]
obs = xr.Dataset.from_dataframe(obs.swaplevel(0,1))
if stn_ids is None:
stns = self.stns
else:
stns = self.stns.loc[stn_ids]
#include station metadata
obs.merge(xr.Dataset.from_dataframe(stns), inplace=True)
return obs
else:
raise ValueError("Unrecognized data format. Expected one of: "
"'stacked', 'tidy', 'array'")
def aaws2nc(args, input_file, output_file, stations):
df = init_dataframe(args, input_file)
ds = xr.Dataset.from_dataframe(df)
ds = ds.drop('time')
common.log(args, 2, 'Retrieving latitude, longitude and station name')
latitude, longitude, station_name = get_station(args, input_file, stations)
common.log(args, 3, 'Calculating time and sza')
time, time_bounds, sza = get_time_and_sza(
args, input_file, latitude, longitude, df)[:3]
if args.no_drv_tm:
pass
else:
common.log(args, 5, 'Calculating month and day')
year, month, day, hour, day_of_year = get_time_and_sza(
args, input_file, latitude, longitude, df)[3:]
ds['year'] = 'time', year
def get_metadata(self, items=None):
""" Return a xr.Dataset of metadata from the input image list
Args:
items (iterable): Subset of metadata column names (`self.extra_md`)
to return
Returns:
xarray.Dataset: A Dataset containing the time series metadata
with coordinate dimenisons (time)
"""
if not items:
items = self.extra_md
return xr.Dataset.from_dataframe(self.df[items])
def scar2nc(args, input_file, output_file):
"""Main function to convert SCAR txt file to netCDF"""
df, temperature_vars, pressure_vars, station_name, latitude, longitude, height, country, institution = init_dataframe(
args, input_file)
ds = xr.Dataset.from_dataframe(df)
ds = ds.drop('time')
common.log(args, 2, 'Calculating time and sza')
time, time_bounds, sza, day_of_year = get_time_and_sza(
args, df, latitude, longitude)
ds['day_of_year'] = 'time', day_of_year
ds['time'] = 'time', time
ds['time_bounds'] = ('time', 'nbnd'), time_bounds
ds['sza'] = 'time', sza
ds['station_name'] = tuple(), station_name
ds['latitude'] = tuple(), latitude
ds['longitude'] = tuple(), longitude
ds['height'] = tuple(), height
self.logger.debug('Calculating %s bias correction for experiment %s',
vname, self.experiment)
postproc_dir = self.exp_config.setdefault(
'postprocdir', osp.join(self.exp_config['expdir'], 'postproc'))
if ds is None:
df = pd.DataFrame(info[vname]).T
try:
# drop all percentiles
df.drop('All', inplace=True)
except (ValueError, KeyError) as e:
pass
df.index.name = 'pctl'
df.reset_index(inplace=True)
df['unorm'] = stats.norm.ppf(
df['pctl'].astype(float) / 100., 0, 1.0)
ds = xr.Dataset.from_dataframe(df)
# --- plots
d = self.exp_config.setdefault('postproc', OrderedDict()).setdefault(
'bias', OrderedDict()).setdefault(vname, OrderedDict())
plot_output = plot_output or d.get('plot_output')
if plot_output is None:
plot_output = osp.join(
postproc_dir, vname + '_bias_correction.pdf')
project_output = osp.splitext(plot_output)[0] + '.pkl'
nc_output = osp.splitext(plot_output)[0] + '.nc'
d['plot_file'] = plot_output
d['project_file'] = project_output
d['nc_file'] = nc_output
def gcnet2nc(args, input_file, output_file, stations):
"""Main function to convert GCNet ascii file to netCDF"""
df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
station_number = df['station_number'][0]
df.drop('station_number', axis=1, inplace=True)
ds = xr.Dataset.from_dataframe(df)
ds = ds.drop('time')
# surface_temp = extrapolate_temp(df)
common.log(args, 2, 'Retrieving latitude, longitude and station name')
latitude, longitude, station_name = get_station(args, input_file, stations)
common.log(args, 3, 'Calculating time and sza')
month, day, hour, minutes, time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
args, df, longitude, latitude)
common.log(args, 4, 'Calculating quality control variables')
fill_dataset_quality_control(df, ds, input_file)
if args.flx:
common.log(args, 5, 'Calculating Sensible and Latent Heat Fluxes')
days_per_request=days_per_request)
#hier=polaris.set_index(['Date','Station Number','Depth'])
# there were 10 rows, 2017-04-04, stations 35 and 36, with duplicate
# entries. Like they measured the same location, same day, 1 hour apart.
hier=polaris.groupby(['Date','Station Number','Depth']).first()
if len(hier) != len(polaris):
logging.warning("After grouping by date, station and depth, there were some duplicates.")
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ds=xr.Dataset.from_dataframe(hier)
ds=ds.rename({'Station Number':'station','Depth':'depth','Date':'cruise'})
ds['date']=ds['cruise']
ds=ds.set_coords(['Julian Date','Days since 1/1/1990','Decimal Date','time',
'Distance from 36','longitude','latitude'])
def agg_field(ds,fld,agg):
with warnings.catch_warnings():
# ignore RuntimeWarning due to all-nan slices
# and FutureWarning for potential NaT!=NaT comparison
warnings.simplefilter('ignore')
vmin=ds[fld].min(dim=agg)
vmax=ds[fld].max(dim=agg)
# funny comparisons to check for either nan/nat or that they
# are equal.
if np.any( (vmin==vmin) & (vmin!=vmax) ):