Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def main():
ctx = tiledb.Ctx()
# Delete
tiledb.remove(ctx, "my_group")
tiledb.remove(ctx, "my_dense_array")
try:
tiledb.remove(ctx, "invalid_path")
except tiledb.TileDBError:
print("Failed to delete invalid path")
index_dict[name] = index.values
else:
raise ValueError("Unhandled index type {}".format(type(index)))
dim_types = list(
dim_info_for_column(ctx, dataframe, values,
tile=tile, full_domain=full_domain,
index_dtype=index_dtype)
for values in index_dict.values()
)
if any([d.dtype in (np.bytes_, np.unicode_) for d in dim_types]):
if sparse is False:
raise TileDBError("Cannot create dense array with string-typed dimensions")
elif sparse is None:
sparse = True
d0 = dim_types[0]
if not all(d0.dtype == d.dtype for d in dim_types[1:]):
if sparse is False:
raise TileDBError("Cannot create dense array with heterogeneous dimension data types")
elif sparse is None:
sparse = True
ndim = len(dim_types)
dims = list(
dim_for_column(ctx, name, dim_types[i], values,
tile=tile, full_domain=full_domain, ndim=ndim)
for i, (name, values) in enumerate(index_dict.items())
def dim_for_column(ctx, name, dim_info, col, tile=None, full_domain=False, ndim=None):
if isinstance(col, np.ndarray):
col_values = col
else:
col_values = col.values
if tile is None:
if ndim is None:
raise TileDBError("Unexpected Nonetype ndim")
if ndim == 1:
tile = 10000
elif ndim == 2:
tile = 1000
elif ndim == 3:
tile = 100
else:
tile = 10
dtype = dim_info.dtype
if full_domain:
if not dim_info.dtype in (np.bytes_, np.unicode):
# Use the full type domain, deferring to the constructor
(dtype_min, dtype_max) = tiledb.libtiledb.dtype_range(dim_info.dtype)
dim_info_for_column(ctx, dataframe, values,
tile=tile, full_domain=full_domain,
index_dtype=index_dtype)
for values in index_dict.values()
)
if any([d.dtype in (np.bytes_, np.unicode_) for d in dim_types]):
if sparse is False:
raise TileDBError("Cannot create dense array with string-typed dimensions")
elif sparse is None:
sparse = True
d0 = dim_types[0]
if not all(d0.dtype == d.dtype for d in dim_types[1:]):
if sparse is False:
raise TileDBError("Cannot create dense array with heterogeneous dimension data types")
elif sparse is None:
sparse = True
ndim = len(dim_types)
dims = list(
dim_for_column(ctx, name, dim_types[i], values,
tile=tile, full_domain=full_domain, ndim=ndim)
for i, (name, values) in enumerate(index_dict.items())
)
if index_dims:
for name in index_dims:
col = dataframe[name]
dims.append(
dim_for_column(ctx, dataframe, col.values, name)
def tile(cls, op):
import tiledb
tensor = super(TensorTileDBDataStore, cls).tile(op)[0]
ctx = tiledb.Ctx(op.tiledb_config)
tiledb_array_type = tiledb.SparseArray if tensor.issparse() else tiledb.DenseArray
try:
tiledb_array_type(uri=op.tiledb_uri, key=op.tiledb_key,
timestamp=op.tiledb_timestamp, ctx=ctx)
except tiledb.TileDBError:
# not exist, try to create TileDB Array by given uri
tiledb_array_schema = get_tiledb_schema_from_tensor(op.input, ctx, op.input.nsplits)
tiledb_array_type.create(op.tiledb_uri, tiledb_array_schema, key=op.tiledb_key)
return [tensor]
kwargs['nrows'] = 500
elif mode not in ['ingest', 'append']:
raise TileDBError("Invalid mode specified ('{}')".format(mode))
chunksize = kwargs.get('chunksize', None)
if multi_file and not chunksize:
raise TileDBError("Multiple input CSV files requires a 'chunksize' argument")
if chunksize is not None or multi_file:
if not 'nrows' in kwargs:
full_domain = True
array_created = False
if mode == 'schema_only':
raise TileDBError("schema_only ingestion not supported for chunked read")
elif mode == 'append':
array_created = True
csv_kwargs = kwargs.copy()
kwargs.update(tiledb_args)
if multi_file:
input_csv_list = csv_file
csv_kwargs.pop("chunksize")
else:
input_csv = csv_file
keep_reading = True
rows_written = 0
csv_idx = 0
df_iter = None
mode = kwargs.pop('mode', None)
if mode is not None:
tiledb_args['mode'] = mode
# For schema_only mode we need to pass a max read count into
# pandas.read_csv
# Note that 'nrows' is a pandas arg!
if mode == 'schema_only' and not 'nrows' in kwargs:
kwargs['nrows'] = 500
elif mode not in ['ingest', 'append']:
raise TileDBError("Invalid mode specified ('{}')".format(mode))
chunksize = kwargs.get('chunksize', None)
if multi_file and not chunksize:
raise TileDBError("Multiple input CSV files requires a 'chunksize' argument")
if chunksize is not None or multi_file:
if not 'nrows' in kwargs:
full_domain = True
array_created = False
if mode == 'schema_only':
raise TileDBError("schema_only ingestion not supported for chunked read")
elif mode == 'append':
array_created = True
csv_kwargs = kwargs.copy()
kwargs.update(tiledb_args)
if multi_file:
input_csv_list = csv_file
vfs = tiledb.VFS(ctx=ctx)
csv_file = tiledb.FileIO(vfs, csv_file, mode='rb')
elif isinstance(csv_file, (list, tuple)):
# TODO may be useful to support a callback here
multi_file = True
mode = kwargs.pop('mode', None)
if mode is not None:
tiledb_args['mode'] = mode
# For schema_only mode we need to pass a max read count into
# pandas.read_csv
# Note that 'nrows' is a pandas arg!
if mode == 'schema_only' and not 'nrows' in kwargs:
kwargs['nrows'] = 500
elif mode not in ['ingest', 'append']:
raise TileDBError("Invalid mode specified ('{}')".format(mode))
chunksize = kwargs.get('chunksize', None)
if multi_file and not chunksize:
raise TileDBError("Multiple input CSV files requires a 'chunksize' argument")
if chunksize is not None or multi_file:
if not 'nrows' in kwargs:
full_domain = True
array_created = False
if mode == 'schema_only':
raise TileDBError("schema_only ingestion not supported for chunked read")
elif mode == 'append':
array_created = True