Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# https://github.com/numpy/numpy/issues/6240
# We don't inspect the values of 0d dask arrays, because these could
# hold potentially very expensive calculations. Instead, we treat
# them just like other arrays, and if necessary cast the result of op
# to match.
vals = [np.empty((1,) * max(1, a.ndim), dtype=a.dtype)
if not is_scalar_for_elemwise(a) else a
for a in args]
try:
dt = apply_infer_dtype(op, vals, {}, 'elemwise', suggest_dtype=False)
except Exception:
return NotImplemented
need_enforce_dtype = any(not is_scalar_for_elemwise(a) and a.ndim == 0 for a in args)
name = kwargs.get('name', None) or '%s-%s' % (funcname(op),
tokenize(op, dt, *args))
atop_kwargs = dict(dtype=dt, name=name, token=funcname(op).strip('_'))
if need_enforce_dtype:
atop_kwargs['enforce_dtype'] = dt
atop_kwargs['enforce_dtype_function'] = op
op = _enforce_dtype
result = atop(op, expr_inds,
*concat((a, tuple(range(a.ndim)[::-1])
if not is_scalar_for_elemwise(a)
else None) for a in args),
**atop_kwargs)
return handle_out(out, result)
import dask
if LooseVersion(dask.__version__) < LooseVersion("0.18.0"):
msg = (
"Automatic chunking requires dask.__version__ >= 0.18.0 . "
"You currently have version %s" % dask.__version__
)
raise NotImplementedError(msg)
block_shape = (1,) + riods.block_shapes[0]
chunks = normalize_chunks(
chunks=(1, "auto", "auto"),
shape=(riods.count, riods.height, riods.width),
dtype=riods.dtypes[0],
previous_chunks=tuple((c,) for c in block_shape),
)
token = tokenize(filename, mtime, chunks)
name_prefix = "open_rasterio-%s" % token
return result.chunk(chunks, name_prefix=name_prefix, token=token)
def elemwise(op, *args, **kwargs):
""" Elementwise operation for dask.Dataframes """
columns = kwargs.get('columns', None)
name = kwargs.get('name', None)
_name = 'elemwise-' + tokenize(op, kwargs, *args)
dasks = [arg for arg in args if isinstance(arg, (_Frame, Scalar))]
other = [(i, arg) for i, arg in enumerate(args)
if not isinstance(arg, (_Frame, Scalar))]
if other:
op2 = partial_by_order(op, other)
else:
op2 = op
dfs = [df for df in dasks if isinstance(df, _Frame)]
divisions = dfs[0].divisions
if not all(df.divisions == divisions for df in dfs):
from .multi import align_partitions
dasks, divisions, parts = align_partitions(*dasks)
n = len(divisions) - 1
shape = shape.tolist()
if not isinstance(shape, (tuple, list)):
shape = (shape,)
chunks = kwargs.pop("chunks", "auto")
dtype = kwargs.pop("dtype", None)
if dtype is None:
dtype = func(shape, *args, **kwargs).dtype
dtype = np.dtype(dtype)
chunks = normalize_chunks(chunks, shape, dtype=dtype)
name = kwargs.pop("name", None)
name = name or funcname(func) + "-" + tokenize(
func, shape, chunks, dtype, args, kwargs
)
return {
"shape": shape,
"dtype": dtype,
"kwargs": kwargs,
"chunks": chunks,
"name": name,
}
args = [args]
npartitions = set(arg.npartitions for arg in args
if isinstance(arg, _Frame))
if len(npartitions) > 1:
raise ValueError("All arguments must have same number of partitions")
npartitions = npartitions.pop()
if split_every is None:
split_every = 8
elif split_every is False:
split_every = npartitions
elif split_every < 2 or not isinstance(split_every, int):
raise ValueError("split_every must be an integer >= 2")
token_key = tokenize(token or (chunk, aggregate), meta, args,
chunk_kwargs, aggregate_kwargs, combine_kwargs,
split_every)
# Chunk
a = '{0}-chunk-{1}'.format(token or funcname(chunk), token_key)
if len(args) == 1 and isinstance(args[0], _Frame) and not chunk_kwargs:
dsk = {(a, i): (chunk, key) for i, key in enumerate(args[0]._keys())}
else:
dsk = {(a, i): (apply, chunk, [(x._name, i) if isinstance(x, _Frame)
else x for x in args], chunk_kwargs)
for i in range(args[0].npartitions)}
# Combine
prefix = '{0}-combine-{1}-'.format(token or funcname(combine), token_key)
k = npartitions
b = a
This assumes that ``k`` is small. All results will be returned in a single
chunk.
Examples
--------
>>> x = np.array([5, 1, 3, 6])
>>> d = from_array(x, chunks=2)
>>> d.topk(2).compute()
array([6, 5])
"""
if x.ndim != 1:
raise ValueError("Topk only works on arrays of one dimension")
token = tokenize(k, x)
name = 'chunk.topk-' + token
dsk = dict(((name, i), (chunk.topk, k, key))
for i, key in enumerate(x._keys()))
name2 = 'topk-' + token
dsk[(name2, 0)] = (getitem, (np.sort, (np.concatenate, list(dsk))),
slice(-1, -k - 1, -1))
chunks = ((k,),)
return Array(sharedict.merge((name2, dsk), x.dask), name2, chunks, dtype=x.dtype)
nr, nc = len(data.chunks[0]), len(data.chunks[1])
cr_max, cc = max(data.chunks[0]), data.chunks[1][0]
if not (data.ndim == 2 and nc == 1): # Is a matrix # Only one column block
raise ValueError(
"Input must have the following properties:\n"
" 1. Have two dimensions\n"
" 2. Have only one column of blocks\n\n"
"Note: This function (tsqr) supports QR decomposition in the case of\n"
"tall-and-skinny matrices (single column chunk/block; see qr)"
"Current shape: {},\nCurrent chunksize: {}".format(
data.shape, data.chunksize
)
)
token = "-" + tokenize(data, compute_svd)
m, n = data.shape
numblocks = (nr, 1)
qq, rr = np.linalg.qr(np.ones(shape=(1, 1), dtype=data.dtype))
layers = data.__dask_graph__().layers.copy()
dependencies = data.__dask_graph__().dependencies.copy()
# Block qr
name_qr_st1 = "qr" + token
dsk_qr_st1 = blockwise(
_wrapped_qr,
name_qr_st1,
"ij",
data.name,
List of partitions to be used
force : bool, default False
Allows the expansion of the existing divisions.
If False then the new divisions lower and upper bounds must be
the same as the old divisions.
Examples
--------
>>> df = df.repartition([0, 5, 10, 20]) # doctest: +SKIP
Also works on Pandas objects
>>> ddf = dd.repartition(df, [0, 5, 10, 20]) # doctest: +SKIP
"""
token = tokenize(df, divisions)
if isinstance(df, _Frame):
tmp = 'repartition-split-' + token
out = 'repartition-merge-' + token
dsk = repartition_divisions(df.divisions, divisions,
df._name, tmp, out, force=force)
return new_dd_object(merge(df.dask, dsk), out,
df._meta, divisions)
elif isinstance(df, (pd.Series, pd.DataFrame)):
name = 'repartition-dataframe-' + token
from .utils import shard_df_on_index
dfs = shard_df_on_index(df, divisions[1:-1])
dsk = dict(((name, i), df) for i, df in enumerate(dfs))
return new_dd_object(dsk, name, df, divisions)
raise ValueError('Data must be DataFrame or Series')
if not all(x.shape == seq[0].shape for x in seq):
raise ValueError("Stacked arrays must have the same shape. Got %s",
[x.shape for x in seq])
ind = list(range(ndim))
uc_args = list(concat((x, ind) for x in seq))
_, seq = unify_chunks(*uc_args)
dt = reduce(np.promote_types, [a.dtype for a in seq])
seq = [x.astype(dt) for x in seq]
assert len(set(a.chunks for a in seq)) == 1 # same chunks
chunks = (seq[0].chunks[:axis] + ((1,) * n,) + seq[0].chunks[axis:])
names = [a.name for a in seq]
name = 'stack-' + tokenize(names, axis)
keys = list(product([name], *[range(len(bd)) for bd in chunks]))
inputs = [(names[key[axis + 1]], ) + key[1:axis + 1] + key[axis + 2:]
for key in keys]
values = [(getitem, inp, (slice(None, None, None),) * axis +
(None, ) + (slice(None, None, None), ) * (ndim - axis))
for inp in inputs]
dsk = dict(zip(keys, values))
dsk2 = sharedict.merge((name, dsk), *[a.dask for a in seq])
return Array(dsk2, name, chunks, dtype=dt)