Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def write_array(data_dict):
ctx = tiledb.Ctx()
# open array for writing, and write data
with tiledb.open(array_name, 'w', ctx=ctx) as array:
array[:] = data_dict
**Example:**
>>> import tiledb
>>> df = tiledb.open_dataframe("iris.tldb")
>>> tiledb.objec_type("iris.tldb")
'array'
"""
warnings.warn("open_dataframe is deprecated and will be removed in the next release",
DeprecationWarning)
import pandas as pd
# TODO support `distributed=True` option?
with tiledb.open(uri) as A:
#if not '__pandas_attribute_repr' in A.meta \
# and not '__pandas_repr' in A.meta:
# raise ValueError("Missing required keys to reload overloaded dataframe dtypes")
# TODO missing key should only be a warning, return best-effort?
# TODO this should be generalized for round-tripping overloadable types
# for any array (e.g. np.uint8 <> bool)
repr_meta = None
index_dims = None
if '__pandas_attribute_repr' in A.meta:
# backwards compatibility... unsure if necessary at this point
repr_meta = json.loads(A.meta['__pandas_attribute_repr'])
if '__pandas_index_dims' in A.meta:
index_dims = json.loads(A.meta['__pandas_index_dims'])
data = A[:]
# apply custom datetime parsing to given {'column_name': format_spec} pairs
# format_spec should be provied using Python format codes:
# https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior
if date_spec is not None:
if type(date_spec) is not dict:
raise TypeError("Expected 'date_spec' to be a dict, got {}".format(type(date_spec)))
for name, spec in date_spec.items():
dataframe[name] = pd.to_datetime(dataframe[name], format=spec)
if write:
write_dict = {k: v.values for k,v in dataframe.to_dict(orient='series').items()}
index_metadata = get_index_metadata(dataframe)
try:
A = tiledb.open(uri, 'w', ctx=ctx)
if A.schema.sparse:
coords = []
for k in range(A.schema.ndim):
coords.append(dataframe.index.get_level_values(k))
# TODO ensure correct col/dim ordering
A[tuple(coords)] = write_dict
else:
if row_start_idx is None:
row_start_idx = 0
row_end_idx = row_start_idx + len(dataframe)
A[row_start_idx:row_end_idx] = write_dict
if create_array:
>>> import dask.array as da # doctest: +SKIP
>>> tdb_ar = da.from_tiledb(uri) # doctest: +SKIP
>>> tdb_ar.shape # doctest: +SKIP
(3, 3)
>>> tdb_ar.mean().compute() # doctest: +SKIP
4.0
"""
import tiledb
tiledb_config = storage_options or dict()
key = tiledb_config.pop("key", None)
if isinstance(uri, tiledb.Array):
tdb = uri
else:
tdb = tiledb.open(uri, attr=attribute, config=tiledb_config, key=key)
if tdb.schema.sparse:
raise ValueError("Sparse TileDB arrays are not supported")
if not attribute:
if tdb.schema.nattr > 1:
raise TypeError(
"keyword 'attribute' must be provided"
"when loading a multi-attribute TileDB array"
)
else:
attribute = tdb.schema.attr(0).name
if tdb.iswritable:
raise ValueError("TileDB array must be open for reading")
def test_output_subarrays(test_dict):
from numpy.testing import assert_array_equal
ctx = tiledb.Ctx()
with tiledb.open(array_name, ctx=ctx) as A:
rt_dict = A[:]
assert_subarrays_equal(test_dict['a2'], rt_dict['a2'])
def array_exists(uri, isdense=False, issparse=False):
"""
Check if arrays exists and is open-able at the given URI
Optionally restrict to `isdense` or `issparse` array types.
"""
try:
a = tiledb.open(uri)
except TileDBError as exc:
return False
if isdense:
rval = not a.schema.sparse
elif issparse:
rval = a.schema.sparse
else:
rval = True
a.close()
return rval