Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def event(self, doc):
@dask.delayed
def delayed_fill(event, key):
self.fill_event(event, include=key)
return numpy.asarray(event['data'][key])
descriptor = self._descriptor_cache[doc['descriptor']]
needs_filling = {key for key, val in descriptor['data_keys'].items()
if 'external' in val}
filled_doc = copy.deepcopy(doc)
for key in needs_filling:
shape = extract_shape(descriptor, key)
dtype = extract_dtype(descriptor, key)
filled_doc['data'][key] = array.from_delayed(
delayed_fill(filled_doc, key), shape=shape, dtype=dtype)
return filled_doc
else:
X_keep = X[keep]
X_rest = X[rest]
A, B = embed(X_keep, X_rest, n_components, metric, params)
_log_array(logger, A, "A")
_log_array(logger, B, "B")
# now the approximation of C
a = A.sum(0) # (l,)
b1 = B.sum(1) # (l,)
b2 = B.sum(0) # (m,)
# TODO: I think we have some unnecessary delayed wrapping of A here.
A_inv = da.from_delayed(delayed(pinv)(A), A.shape, A.dtype)
inner = A_inv.dot(b1)
d1_si = 1 / da.sqrt(a + b1)
d2_si = 1 / da.sqrt(b2 + B.T.dot(inner)) # (m,), dask array
# d1, d2 are diagonal, so we can avoid large matrix multiplies
# Equivalent to diag(d1_si) @ A @ diag(d1_si)
A2 = d1_si.reshape(-1, 1) * A * d1_si.reshape(1, -1) # (n, n)
_log_array(logger, A2, "A2")
# A2 = A2.rechunk(A2.shape)
# Equivalent to diag(d1_si) @ B @ diag(d2_si)
B2 = da.multiply(da.multiply(d1_si.reshape(-1, 1), B), d2_si.reshape(1, -1))
_log_array(logger, B2, "B2")
U_A, S_A, V_A = delayed(svd, pure=True, nout=3)(A2)
current_dim_begin_index + curr_dim_index
for current_dim_begin_index, curr_dim_index in zip(begin_indicies, i)
)
# Zip the dims with the read indices
this_chunk_read_dims = dict(
zip(blocked_dimension_order, this_chunk_read_indicies)
)
# Remove the dimensions that we want to chunk by from the read dims
for d in chunk_by_dims:
if d in this_chunk_read_dims:
this_chunk_read_dims.pop(d)
# Add delayed array to lazy arrays at index
lazy_arrays[i] = da.from_delayed(
delayed(LifReader._imread)(
img, offsets, read_lengths, lif.xml_root, this_chunk_read_dims
),
shape=sample_chunk_shape,
dtype=sample.dtype,
)
# Convert the numpy array of lazy readers into a dask array and fill the inner
# most empty dimensions with chunks
merged = da.block(lazy_arrays.tolist())
# Because we have set certain dimensions to be chunked and others not
# we will need to transpose back to original dimension ordering
# Example being, if the original dimension ordering was "SZYX" and we want to
# chunk by "S", "Y", and "X" we created an array with dimensions ordering "ZSYX"
transpose_indices = []
def get_full_angles(self):
"""Get the interpolated lons/lats."""
if (self.sun_azi is not None and self.sun_zen is not None and
self.sat_azi is not None and self.sat_zen is not None):
return self.sun_azi, self.sun_zen, self.sat_azi, self.sat_zen
self.sun_azi, self.sun_zen, self.sat_azi, self.sat_zen = self._get_full_angles()
self.sun_azi = da.from_delayed(self.sun_azi, dtype=self["ANGULAR_RELATIONS"].dtype,
shape=(self.scanlines, self.pixels))
self.sun_zen = da.from_delayed(self.sun_zen, dtype=self["ANGULAR_RELATIONS"].dtype,
shape=(self.scanlines, self.pixels))
self.sat_azi = da.from_delayed(self.sat_azi, dtype=self["ANGULAR_RELATIONS"].dtype,
shape=(self.scanlines, self.pixels))
self.sat_zen = da.from_delayed(self.sat_zen, dtype=self["ANGULAR_RELATIONS"].dtype,
shape=(self.scanlines, self.pixels))
return self.sun_azi, self.sun_zen, self.sat_azi, self.sat_zen
def transform(self, raw_X):
msg = "'X' should be a 1-dimensional array with length 'num_samples'."
if not dask.is_dask_collection(raw_X):
return self._hasher(**self.get_params()).transform(raw_X)
if isinstance(raw_X, db.Bag):
bag2 = raw_X.map_partitions(self._transformer)
objs = bag2.to_delayed()
arrs = [
da.from_delayed(obj, (np.nan, self.n_features), self.dtype)
for obj in objs
]
result = da.concatenate(arrs, axis=0)
elif isinstance(raw_X, dd.Series):
result = raw_X.map_partitions(self._transformer)
elif isinstance(raw_X, da.Array):
# dask.Array
chunks = ((np.nan,) * raw_X.numblocks[0], (self.n_features,))
if raw_X.ndim == 1:
result = raw_X.map_blocks(
self._transformer, dtype="f8", chunks=chunks, new_axis=1
)
else:
raise ValueError(msg)
else:
raise ValueError(msg)
# First ensure the mask array is boolean (not int).
mask = mask.astype(bool)
result = ma.masked_all(mask.shape, dtype=dtype)
# Apply the fill-value from the proxy object.
# Note: 'values' is just 'proxy' in a dask wrapper. This arg
# must be a dask type so that 'delayed' can recognise it, but
# that provides no access to the underlying fill value.
result.fill_value = proxy.mdi
n_values = np.sum(mask)
if n_values > 0:
# Note: data field can have excess values, but not fewer.
result[mask] = values[:n_values]
return result
delayed_result = calc_array(mask_field_array, delayed_valid_values)
lazy_result_array = da.from_delayed(
delayed_result, shape=block_shape, dtype=dtype
)
field.data = lazy_result_array
# for objs in persisted.values()
# }
# )
# == 1
# ), "All numpy arrays returned from call should have same len"
lens = {
key: [dask.delayed(len)(objs[j]) for j in range(batch_count)]
for key, objs in persisted.items()
}
lens, keys = _dict_to_tuple(lens)
lens = client.gather(client.compute(lens))
lens = _tuple_to_dict(lens, keys)
for key, objs in persisted.items():
arr = dask.array.concatenate(
[
dask.array.from_delayed(
obj,
dtype=self._tensors[key].dtype,
shape=(lens[key][i],) + tuple(self._tensors[key].shape[1:]),
)
for i, obj in enumerate(objs)
]
)
if collected[key] is None:
collected[key] = arr
else:
collected[key] = dask.array.concatenate(collected[key], arr)
# tasks = [obj for key, objs in persisted.items() for obj in objs]
tasks = []
for key in list(collected.keys()):
c = collected[key]
def from_delayed(fm, uri, var, chunk_shape):
size = var.shape[0]
step = chunk_shape[0]
logger.info('Building input from delayed functions size %r step %r', size, step)
chunks = [slice(x, min(size, x+step), 1) for x in range(0, size, step)]
logger.info('Generated %r chunks', len(chunks))
delayed = [dask.delayed(retrieve_chunk)(uri, var.id, {'time': x}, fm.cert_data) for x in chunks]
arrays = [da.from_delayed(x, new_shape(var.shape, y), var.dtype) for x, y in zip(delayed, chunks)]
return da.concatenate(arrays, axis=0)
def Jtvec(self, m, v, f=None):
if self.coordinate_system == 'cartesian':
dmudm = self.chiMap.deriv(m)
else:
dmudm = self.dSdm * self.chiMap.deriv(m)
if self.modelType == 'amplitude':
dfdm_v = dask.delayed(csr.dot)(v, self.dfdm)
vec = da.from_delayed(dfdm_v, dtype=float, shape=[self.dfdm.shape[0]])
if getattr(self, '_Mxyz', None) is not None:
jtvec = da.dot(vec.astype(np.float32), self.G)
Jtvec = dask.delayed(csr.dot)(jtvec, self.Mxyz)
else:
Jtvec = da.dot(vec.astype(np.float32), self.G)
else:
Jtvec = da.dot(v.astype(np.float32), self.G)
dmudm_v = dask.delayed(csr.dot)(Jtvec, dmudm)
img = openslide.open_slide(svs_file)
if type(img) is openslide.OpenSlide:
gen = deepzoom.DeepZoomGenerator(
img, tile_size=tile_size, overlap=overlap, limit_bounds=True)
max_level = len(gen.level_dimensions) - 1
n_tiles_x, n_tiles_y = gen.level_tiles[max_level]
@dask.delayed(pure=True)
def get_tile(level, column, row):
tile = gen.get_tile(level, (column, row))
return np.array(tile).transpose((1, 0, 2))
sample_tile_shape = get_tile(max_level, 0, 0).shape.compute()
rows = range(n_tiles_y - (0 if not remove_last else 1))
cols = range(n_tiles_x - (0 if not remove_last else 1))
arr = da.concatenate([da.concatenate([da.from_delayed(get_tile(max_level, col, row), sample_tile_shape, np.uint8) for row in rows],
allow_unknown_chunksizes=allow_unknown_chunksizes, axis=1) for col in cols], allow_unknown_chunksizes=allow_unknown_chunksizes)
if transpose:
arr=arr.transpose([1, 0, 2])
return arr
else: # img is instance of openslide.ImageSlide
return dask_image.imread.imread(svs_file)