Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
validation_set = list(pd.date_range(datetime(2003, 1, 1, 0), datetime(2006, 12, 31, 18), freq='6H'))
train_set = list(pd.date_range(datetime(1979, 1, 1, 6), datetime(2002, 12, 31, 18), freq='6H'))
#%% Open data. If temporary file is specified, copy it there.
if args.temp_dir != 'None':
new_predictor_file = os.path.join(args.temp_dir, args.predictor_file)
print('Copying predictor file to %s...' % new_predictor_file)
if os.path.isfile(new_predictor_file):
print('File already exists!')
else:
shutil.copy(predictor_file, new_predictor_file, follow_symlinks=True)
data = xr.open_dataset(new_predictor_file, chunks={'sample': batch_size})
else:
data = xr.open_dataset(predictor_file, chunks={'sample': batch_size})
if 'time_step' in data.dims:
time_dim = data.dims['time_step']
else:
time_dim = 1
n_sample = data.dims['sample']
if crop_north_pole:
data = data.isel(lat=(data.lat < 90.0))
#%% Create a model and the data generators
dlwp = DLWPFunctional(is_convolutional=model_is_convolutional, is_recurrent=model_is_recurrent, time_dim=io_time_steps)
# Find the validation set
def _read_dataset(input_file):
input_file_name = os.path.basename(input_file)
if os.path.isdir(input_file):
if input_file_name.endswith('.zarr'):
ds = xr.open_zarr(input_file)
else:
ds = xr.open_mfdataset(glob.glob(os.path.join(input_file, '**', '*.nc'), recursive=True))
else:
if input_file_name.endswith('.zarr.zip'):
ds = xr.open_zarr(input_file)
else:
ds = xr.open_dataset(input_file)
return ds
def time_load_dataset_netcdf4_with_block_chunks_vindexing(self):
ds = xr.open_dataset(self.filepath, engine="netcdf4", chunks=self.block_chunks)
ds = ds.isel(**self.vinds).load()
gridder(glm_filenames, start_time, end_time, **grid_kwargs)
# print("Output file sizes")
for entry in os.scandir(os.path.join(tmpdirname, *resulting_date_path)):
# File size should be close to what we expect, with some platform
# differences due to OS, compression, etc.
target = output_sizes[entry.name]
actual = entry.stat().st_size
percent = 1
assert np.abs(target-actual) < int(target*percent/100)
# Now compare the contents directly
valid_file = os.path.join(sample_path, dirname,
*resulting_date_path, entry.name)
valid = xr.open_dataset(valid_file)
check = xr.open_dataset(entry.path)
xr.testing.assert_allclose(valid, check)
def from_netcdf(cls, fname):
return cls.from_dataset(xr.open_dataset(fname))
def compute_write_pet(kwrgs):
# open the temperature NetCDF as an xarray DataSet object
dataset = xr.open_dataset(kwrgs["netcdf_temp"])
# trim out all data variables from the dataset except the precipitation
for var in dataset.data_vars:
if var not in kwrgs["var_name_temp"]:
dataset = dataset.drop(var)
# get the initial year of the data
data_start_year = int(str(dataset["time"].values[0])[0:4])
_logger.info("Computing PET")
# get the temperature and latitude arrays, over which we'll compute the PET
da_temp = dataset[kwrgs["var_name_temp"]]
# create a DataArray with the same shape as temperature, fill all lon/times with the lat value for the lat index
da_lat_orig = dataset["lat"]
if check_cancel_task(self, task): return
#sorting based on time id - earlier processed first as they're incremented e.g. 0, 1, 2..
chunks = chunks if isinstance(chunks, list) else [chunks]
chunks = [chunk for chunk in chunks if chunk is not None]
if len(chunks) == 0:
return None
total_chunks = sorted(chunks, key=lambda x: x[0])
geo_chunk_id = total_chunks[0][2]['geo_chunk_id']
time_chunk_id = total_chunks[0][2]['time_chunk_id']
metadata = {}
combined_data = None
for index, chunk in enumerate(total_chunks):
metadata.update(chunk[1])
data = xr.open_dataset(chunk[0])
if combined_data is None:
combined_data = data
continue
#give time an indice to keep mosaicking from breaking.
data = xr.concat([data], 'time')
data['time'] = [0]
clear_mask = task.satellite.get_clean_mask_func()(data)
combined_data = task.get_processing_method()(data,
clean_mask=clear_mask,
intermediate_product=combined_data,
no_data=task.satellite.no_data_value,
reverse_time=task.get_reverse_time())
if check_cancel_task(self, task): return
if combined_data is None:
return None
from metpy.units import units
from netCDF4 import num2date
import scipy.ndimage as ndimage
from siphon.catalog import TDSCatalog
import xarray as xr
##############################################
# Get satellite data and set projection based on that data.
# Scan the catalog and download the data
satcat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/satellite/'
'WV/WEST-CONUS_4km/current/catalog.xml')
dataset = satcat.datasets[0]
f = GiniFile(dataset.remote_open())
gini_ds = xr.open_dataset(f)
# Pull parts out of the data file
dat = gini_ds.metpy.parse_cf('WV')
data_var = gini_ds.variables['WV']
x = gini_ds.variables['x'][:]
y = gini_ds.variables['y'][:]
timestamp = f.prod_desc.datetime
##############################################
# Use Siphon to obtain data that is close to the time of the satellite file
gfscat = TDSCatalog('http://thredds.ucar.edu/thredds/catalog/grib/'
'NCEP/GFS/Global_0p5deg/catalog.xml')
dataset = gfscat.datasets['Best GFS Half Degree Forecast Time Series']
ncss = dataset.subset()
dsRestart = xr.open_dataset(restartFileName)
dsRestart = dsRestart.isel(Time=0)
areaCell = dsRestart.areaCell
nVertLevels = dsRestart.sizes['nVertLevels']
vertIndex = \
xr.DataArray.from_dict({'dims': ('nVertLevels',),
'data': np.arange(nVertLevels)})
vertMask = vertIndex < dsRestart.maxLevelCell
# get region masks
regionMaskFileName = self.parentTask.masksSubtask.maskFileName
dsRegionMask = xr.open_dataset(regionMaskFileName)
# figure out the indices of the regions to plot
regionNames = decode_strings(dsRegionMask.regionNames)
regionIndices = []
for regionToPlot in self.parentTask.regionNames:
for index, regionName in enumerate(regionNames):
if regionToPlot == regionName:
regionIndices.append(index)
break
# select only those regions we want to plot
dsRegionMask = dsRegionMask.isel(nRegions=regionIndices)
cellMasks = dsRegionMask.regionCellMasks
regionNamesVar = dsRegionMask.regionNames
meas: Sequence[str] = None,
verbose: bool = False,
*,
overwrite: bool = False,
fast: bool = True,
interval: Union[float, int, timedelta] = None) -> xarray.Dataset:
"""
Read RINEX 2.x and 3.x OBS files in ASCII or GZIP (or Hatanaka)
"""
if isinstance(fn, (str, Path)):
fn = Path(fn).expanduser()
# %% NetCDF4
if fn.suffix == '.nc':
try:
return xarray.open_dataset(fn, group=group)
except OSError as e:
raise LookupError(f'Group {group} not found in {fn} {e}')
tlim = _tlim(tlim)
# %% version selection
info = rinexinfo(fn)
if int(info['version']) in (1, 2):
obs = rinexobs2(fn, use, tlim=tlim,
useindicators=useindicators, meas=meas,
verbose=verbose,
fast=fast, interval=interval)
elif int(info['version']) == 3:
obs = rinexobs3(fn, use, tlim=tlim,
useindicators=useindicators, meas=meas,
verbose=verbose,