Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def totiledb(uri, x, ctx=None, key=None, timestamp=None):
import tiledb
x = astensor(x)
raw_ctx = ctx
if raw_ctx is None:
ctx = tiledb.Ctx()
tiledb_array_type = tiledb.SparseArray if x.issparse() else tiledb.DenseArray
try:
tiledb_array = tiledb_array_type(uri=uri, key=key, timestamp=timestamp, ctx=ctx)
# if already created, we will check the shape and dtype
check_tiledb_array_with_tensor(x, tiledb_array)
except tiledb.TileDBError:
# not exist, as we don't know the tile,
# we will create the tiledb array in the tile of tensor
pass
tiledb_config = None if raw_ctx is None else raw_ctx.config().dict()
op = TensorTileDBDataStore(tiledb_config=tiledb_config, tiledb_uri=uri,
tiledb_key=key, tiledb_timestamp=timestamp,
dtype=x.dtype, sparse=x.issparse())
return op(x)
def read_array(order):
# Open the array and read from it.
with tiledb.SparseArray(array_name, mode='r') as A:
# Get non-empty domain
print("Non-empty domain: {}".format(A.nonempty_domain()))
# Slice only rows 1, 2 and cols 2, 3, 4.
# NOTE: The `query` syntax is required to specify an order
# other than the default row-major
data = A.query(attrs=["a"], order=order, coords=True)[1:3, 2:5]
a_vals = data["a"]
coords = data["coords"]
for i in range(coords.shape[0]):
print("Cell {} has data {}".format(str(coords[i]), str(a_vals[i])))
def totiledb(uri, x, ctx=None, key=None, timestamp=None):
import tiledb
x = astensor(x)
raw_ctx = ctx
if raw_ctx is None:
ctx = tiledb.Ctx()
tiledb_array_type = tiledb.SparseArray if x.issparse() else tiledb.DenseArray
try:
tiledb_array = tiledb_array_type(uri=uri, key=key, timestamp=timestamp, ctx=ctx)
# if already created, we will check the shape and dtype
check_tiledb_array_with_tensor(x, tiledb_array)
except tiledb.TileDBError:
# not exist, as we don't know the tile,
# we will create the tiledb array in the tile of tensor
pass
tiledb_config = None if raw_ctx is None else raw_ctx.config().dict()
op = TensorTileDBDataStore(tiledb_config=tiledb_config, tiledb_uri=uri,
tiledb_key=key, tiledb_timestamp=timestamp,
dtype=x.dtype, sparse=x.issparse())
return op(x)
key = op.tiledb_key
timestamp = op.tiledb_timestamp
slcs = []
for axis in range(chunk.ndim):
axis_offset = axis_offsets[axis]
axis_length = chunk.shape[axis]
slcs.append(slice(axis_offset, axis_offset + axis_length))
if not op.sparse:
# read dense array from tiledb
with tiledb.DenseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
ctx[chunk.key] = tiledb_arr[tuple(slcs)]
else:
# read sparse array from tiledb
with tiledb.SparseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
if tiledb_arr.ndim > 2:
raise NotImplementedError(
'Does not support to read array with more than 2 dimensions')
data = tiledb_arr[tuple(slcs)]
coords = data['coords']
value = data[tiledb_arr.attr(0).name]
if tiledb_arr.ndim == 2:
# 2-d
ij = tuple(coords[tiledb_arr.domain.dim(k).name] - axis_offsets[k]
for k in range(tiledb_arr.ndim))
spmatrix = sps.coo_matrix((value, ij), shape=chunk.shape)
ctx[chunk.key] = SparseNDArray(spmatrix)
else:
# 1-d
ctx = tiledb.Ctx()
# create dimensions
d1 = tiledb.Dim(ctx, "", domain=(1, 1000), tile=10, dtype="uint64")
d2 = tiledb.Dim(ctx, "d2", domain=(101, 10000), tile=100, dtype="uint64")
# create domain
domain = tiledb.Domain(ctx, d1, d2)
# create attributes
a1 = tiledb.Attr(ctx, "", dtype="int32,int32,int32")
a2 = tiledb.Attr(ctx, "a2", compressor=("gzip", -1), dtype="float32")
# create sparse array with schema
schema = tiledb.SparseArray(ctx, "sparse_array_schema",
domain=domain, attrs=(a1, a2),
capacity=10,
tile_order='row-major',
cell_order='col-major',
coords_compressor=('zstd', 4),
offsets_compressor=('blosc-lz', 5))
schema.dump()
# Print from schema
print("From schema properties:")
print("- Array type: ", "sparse" if schema.sparse else "dense")
print("- Cell order: ", schema.cell_order)
print("- Tile order: ", schema.tile_order)
print("- Capacity: ", schema.capacity)
print("- Coordinates compressor: ", schema.coords_compressor)
print("- Offsets compressor: ", schema.offsets_compressor)
# dense
to_store = np.ascontiguousarray(ctx[op.input.key])
slcs = []
for axis in range(chunk.ndim):
axis_offset = int(axis_offsets[axis])
axis_length = int(op.input.shape[axis])
slcs.append(slice(axis_offset, axis_offset + axis_length))
with tiledb.DenseArray(uri=uri, ctx=tiledb_ctx, mode='w',
key=key, timestamp=timestamp) as arr:
arr[tuple(slcs)] = to_store
ctx[chunk.key] = np.empty((0,) * chunk.ndim, dtype=chunk.dtype)
else:
# sparse
to_store = ctx[op.input.key].spmatrix.tocoo()
if to_store.nnz > 0:
with tiledb.SparseArray(uri=uri, ctx=tiledb_ctx, mode='w',
key=key, timestamp=timestamp) as arr:
if chunk.ndim == 1:
vec = to_store.col if to_store.shape[0] == 1 else to_store.row
vec += axis_offsets[0]
arr[vec] = to_store.data
else:
i, j = to_store.row + axis_offsets[0], to_store.col + axis_offsets[1]
arr[i, j] = to_store.data
ctx[chunk.key] = SparseNDArray(sps.csr_matrix((0, 0), dtype=chunk.dtype),
shape=chunk.shape)
key = chunk.op.tiledb_key
timestamp = chunk.op.tiledb_timestamp
slcs = []
for axis in range(chunk.ndim):
axis_offset = axis_offsets[axis]
axis_length = chunk.shape[axis]
slcs.append(slice(axis_offset, axis_offset + axis_length))
if not chunk.issparse():
# read dense array from tiledb
with tiledb.DenseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
ctx[chunk.key] = tiledb_arr[tuple(slcs)]
else:
# read sparse array from tiledb
with tiledb.SparseArray(uri=uri, ctx=tiledb_ctx, key=key, timestamp=timestamp) as tiledb_arr:
if tiledb_arr.ndim > 2:
raise NotImplementedError(
'Does not support to read array with more than 2 dimensions')
data = tiledb_arr[tuple(slcs)]
coords = data['coords']
value = data[tiledb_arr.attr(0).name]
if tiledb_arr.ndim == 2:
# 2-d
ij = tuple(coords[tiledb_arr.domain.dim(k).name] - axis_offsets[k]
for k in range(tiledb_arr.ndim))
spmatrix = sps.coo_matrix((value, ij), shape=chunk.shape)
ctx[chunk.key] = SparseNDArray(spmatrix)
else:
# 1-d
ij = xp.zeros(coords.shape), \