Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_no_mftime(self):
# rasterio can accept "filename" urguments that are actually urls,
# including paths to remote files.
# In issue #1816, we found that these caused dask to break, because
# the modification time was used to determine the dask token. This
# tests ensure we can still chunk such files when reading with
# rasterio.
with create_tmp_geotiff(
8, 10, 3, transform_args=[1, 2, 0.5, 2.0], crs="+proj=latlong"
) as (tmp_file, expected):
with mock.patch("os.path.getmtime", side_effect=OSError):
with xr.open_rasterio(tmp_file, chunks=(1, 2, 2)) as actual:
assert isinstance(actual.data, da.Array)
assert_allclose(actual, expected)
y = da.random.random(m, partition_size)
regressor = xgboost.dask.DaskXGBRegressor(verbosity=1, n_estimators=2)
regressor.set_params(tree_method='hist')
# assigning client here is optional
regressor.client = client
regressor.fit(X, y, eval_set=[(X, y)])
prediction = regressor.predict(X)
bst = regressor.get_booster()
history = regressor.evals_result()
print('Evaluation history:', history)
# returned prediction is always a dask array.
assert isinstance(prediction, da.Array)
return bst # returning the trained model
def _get_array(storage_units, var_name, dimensions, dim_props):
"""
Create an xarray.DataArray
:return xarray.DataArray
"""
dsk_id = str(uuid.uuid1()) # unique name for the requested dask
dsk = _get_dask_for_storage_units(storage_units, var_name, dimensions, dim_props['dim_vals'], dsk_id)
_fill_in_dask_blanks(dsk, storage_units, var_name, dimensions, dim_props, dsk_id)
dtype = storage_units[0].variables[var_name].dtype
chunks = tuple(tuple(dim_props['sus_size'][dim]) for dim in dimensions)
dask_array = da.Array(dsk, dsk_id, chunks, dtype=dtype)
coords = [(dim, dim_props['coord_labels'][dim]) for dim in dimensions]
xarray_data_array = xarray.DataArray(dask_array, coords=coords)
return xarray_data_array
def _slicer(self, slices, isNavigation=None, out=None):
array_slices = self._get_array_slices(slices, isNavigation)
new_data = self.data[array_slices]
if new_data.size == 1 and new_data.dtype is np.dtype('O'):
if isinstance(new_data[0], (np.ndarray, dArray)):
return self.__class__(new_data[0]).transpose(navigation_axes=0)
else:
return new_data[0]
if out is None:
_obj = self._deepcopy_with_new_data(new_data,
copy_variance=True)
_to_remove = []
for slice_, axis in zip(array_slices, _obj.axes_manager._axes):
if (isinstance(slice_, slice) or
len(self.axes_manager._axes) < 2):
axis._slice_me(slice_)
else:
_to_remove.append(axis.index_in_axes_manager)
for _ind in reversed(sorted(_to_remove)):
_obj._remove_axis(_ind)
with File(filename, mode='r') as f:
rec_names = sorted([name for name in f['/recordings']])
shapes = [f['/recordings/{}/data'.format(name)].shape
for name in rec_names]
# Create the dask graph for all recordings from the .kwdd file.
dask = {('data', idx, 0): (_read_recording, filename, rec_name)
for (idx, rec_name) in enumerate(rec_names)}
# Make sure all recordings have the same number of channels.
n_cols = shapes[0][1]
assert all(shape[1] == n_cols for shape in shapes)
# Create the dask Array.
chunks = (tuple(shape[0] for shape in shapes), (n_cols,))
return Array(dask, 'data', chunks)
def substract_from_offset(value, signals):
if isinstance(value, da.Array):
value = value.compute()
for signal in signals:
signal.axes_manager[-1].offset -= value
try:
a[a == no_data] = a.dtype.type(nan)
except ValueError:
pass
return a
ds = self.ds
dims = ['lat', 'lon']
chunks = ((ds.RasterYSize,), (ds.RasterXSize,))
shape = (ds.RasterYSize, ds.RasterXSize)
variables = OrderedDict()
for iband in range(1, ds.RasterCount+1):
band = ds.GetRasterBand(iband)
dt = dtype(gdal_array.codes[band.DataType])
if with_dask:
dsk = {('x', 0, 0): (load, iband)}
arr = Array(dsk, 'x', chunks, shape=shape, dtype=dt)
else:
arr = load(iband)
attrs = band.GetMetadata_Dict()
try:
dt.type(nan)
attrs['_FillValue'] = nan
except ValueError:
no_data = band.GetNoDataValue()
attrs.update({'_FillValue': no_data} if no_data else {})
variables['Band%i' % iband] = Variable(dims, arr, attrs)
variables['lat'], variables['lon'] = self._load_GeoTransform()
return FrozenOrderedDict(variables)
if not np.sum(time_chunks[uts]) == rs.stop - rs.start:
sum_chunks = np.sum(time_chunks[uts])
raise ValueError("Sum of time_chunks[%d:%d] '%d' "
"does not match the number of rows '%d' "
"in the row[%d:%d]" %
(uts.start, uts.stop, sum_chunks,
rs.stop-rs.start,
rs.start, rs.stop))
# Chunks for 'utime', 'antenna' and 'uvw' dimensions
chunks = (tuple(utime_groups),
(xds.dims["antenna"],),
(xds.dims["(u,v,w)"],))
# Create dask array and assign it to the dataset
dask_array = da.Array(dsk, name, chunks, xds.uvw.dtype)
dims = ("utime", "antenna", "(u,v,w)")
return xds.assign(antenna_uvw=xr.DataArray(dask_array, dims=dims))
lazy, the result is computed in memory because it depends on the \
current state of the axes that could change later on in the workflow.
Raises
------
SignalDimensionError
If the signal dimension is not 1.
"""
if show_progressbar is None:
show_progressbar = preferences.General.show_progressbar
self._check_signal_dimension_equals_one()
ip = number_of_interpolation_points + 1
axis = self.axes_manager.signal_axes[0]
self._check_navigation_mask(mask)
# we compute for now
if isinstance(start, da.Array):
start = start.compute()
if isinstance(end, da.Array):
end = end.compute()
i1, i2 = axis._get_index(start), axis._get_index(end)
if reference_indices is None:
reference_indices = self.axes_manager.indices
ref = self.inav[reference_indices].data[i1:i2]
if interpolate is True:
ref = interpolate1D(ip, ref)
iterating_kwargs = ()
if mask is not None:
iterating_kwargs += (('mask', mask),)
shift_signal = self._map_iterate(
_estimate_shift1D,
iterating_kwargs=iterating_kwargs,