Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
# Determine dimensions of raw frame data
a_type = a.dtype
a_shape = a.shape
len_a_shape = len(a_shape)
img_axes = len_a_shape-2, len_a_shape-1
a_half = int(a_shape[img_axes[0]] / 2), int(a_shape[img_axes[1]] / 2)
# Define 3 pixel wide cross of zeros to pad raw data
z_array = da.zeros((a_shape[0],a_shape[1],3), dtype = a_type)
z_array2 = da.zeros((a_shape[0],3,a_shape[img_axes[1]]+3), dtype = a_type)
# Insert blank cross into raw data
b = da.concatenate((a[:,:,:a_half[1]],z_array, a[:,:,a_half[1]:]), axis = -1)
b = da.concatenate((b[:, :a_half[0],:], z_array2, b[:,a_half[0]:,:]), axis = -2)
return b
return dd.from_pandas(data, chunksize=chunksize, **kwargs)
elif source.container == 'list':
return db.from_sequence(source.read(), **kwargs)
else:
raise ValueError('Unknown container type: %s' % source.container)
else:
# Strategy depends on source properties
if source.get_chunks_supported:
chunks = source.get_chunks(chunksize)
futures = client.map(read_func, chunks)
else:
futures = client.map(read_func, [source])
if source.container == 'ndarray':
array_parts = [da.from_delayed(f, shape=c.shape, dtype=c.dtype) for f, c in zip(futures, chunks)]
return da.concatenate(array_parts, axis=0)
elif source.container == 'dataframe':
return dd.from_delayed(futures)
elif source.container == 'list':
return db.from_delayed(futures)
else:
raise ValueError('Unknown container type: %s' % source.container)
def _concat_dask(arrs):
"""NOTE: dask is currently not used because of severe performance issues"""
from dask.array import concatenate, from_array
return concatenate([from_array(arr, chunks=arr.shape) for arr in arrs],
axis=0)
msgCount = 0
while True:
bufr = ec.codes_bufr_new_from_file(fh)
if bufr is None:
break
ec.codes_set(bufr, 'unpack', 1)
# if is the first message initialise our final array
if (msgCount == 0):
arr = da.from_array(ec.codes_get_array(
bufr, key, float), chunks=CHUNK_SIZE)
else:
tmpArr = da.from_array(ec.codes_get_array(
bufr, key, float), chunks=CHUNK_SIZE)
arr = da.concatenate((arr, tmpArr))
msgCount = msgCount+1
ec.codes_release(bufr)
if arr.size == 1:
arr = arr[0]
return arr
XD = X.to_delayed().flatten().tolist()
func = delayed(metrics.pairwise_distances_argmin_min, pure=True, nout=2)
blocks = [func(x, Y, metric=metric, metric_kwargs=metric_kwargs) for x in XD]
argmins, mins = zip(*blocks)
argmins = [
da.from_delayed(block, (chunksize,), np.int64)
for block, chunksize in zip(argmins, X.chunks[0])
]
# Scikit-learn seems to always use float64
mins = [
da.from_delayed(block, (chunksize,), "f8")
for block, chunksize in zip(mins, X.chunks[0])
]
argmins = da.concatenate(argmins)
mins = da.concatenate(mins)
return argmins, mins
thelist.extend([tapered, therest])
dc = da.concatenate(thelist, axis=-1)
else:
dc[..., offset:channels + offset] *= (
np.hanning(2 * channels)[:channels])
dc[..., :offset] *= 0.
if side == 'right' or side == 'both':
rl = None if offset == 0 else -offset
if self._lazy:
therest = dc[..., :-channels - offset]
tapered = dc[..., -channels - offset:rl]
tapered *= np.hanning(2 * channels)[-channels:]
thelist = [therest, tapered]
if offset != 0:
thelist.append(zeros)
dc = da.concatenate(thelist, axis=-1)
else:
dc[..., -channels - offset:rl] *= (
np.hanning(2 * channels)[-channels:])
if offset != 0:
dc[..., -offset:] *= 0.
if self._lazy:
self.data = dc
self.events.data_changed.trigger(obj=self)
return channels
shape = list(dataarray.shape)
logger.info('Chunk index %s size %s, data array shape %s', index, size, shape)
chunk_slices = [slice(x, min(x+chunk_step, size)) for x in range(0, size, chunk_step)]
chunk_shapes = [update_shape(shape.copy(), index, x.stop-x.start) for x in chunk_slices]
logger.info('Split variable into %s chunks', len(chunk_slices))
delayed = [dask.delayed(get_protected_data)(url, var_name, cert, time=x) for x in chunk_slices]
dask_da = [da.from_delayed(y, shape=x, dtype=dataarray.dtype) for x, y in zip(chunk_shapes, delayed)]
concat = da.concatenate(dask_da, axis=0)
logger.info('Created dask array shape %s', concat.shape)
return concat
def _concat(self, *args, **kwargs):
if self._dask:
return da.concatenate(*args, **kwargs)
else:
return np.concatenate(*args, **kwargs)
(slice(nside*n, nside*(n+1)),))
data_face = ar[face_slice]
face_arrays.append(data_face)
# We can't concatenate using numpy (hcat etc.) because it makes a copy,
# presumably loading the memmaps into memory.
# Using dask gets around this.
# But what if we want different chunks, or already chunked the data
# upstream? Doesn't seem like this is ideal
# TODO: Refactor handling of dask arrays and chunking
#return np.concatenate(face_arrays, axis=jdim)
# the dask version doesn't work because of this:
# https://github.com/dask/dask/issues/1645
face_arrays_dask = [da.from_array(fa, chunks=fa.shape)
for fa in face_arrays]
concat = da.concatenate(face_arrays_dask, axis=jdim)
return concat
def da_stack(folder, shape):
da_list = []
full_path = path + folder
max_blocks = shape[0]//windowSize + 1
for block in range(1,max_blocks + 1):
for row in range(0,windowSize):
name = str(block) + 'r' + str(row)
full_name = full_path + name + '.zarr'
try:
da_array = da.from_zarr(full_name)
da_list.append(da_array)
except Exception:
continue
return da.rechunk(da.concatenate(da_list, axis=0), chunks = (shape[1],windowSize**2))