Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_daterange_end(library):
df = DataFrame(index=date_range(dt(2001, 1, 1), freq='S', periods=30 * 1024),
data=np.tile(np.arange(30 * 1024), 100).reshape((-1, 100)))
df.columns = [str(c) for c in df.columns]
library.write('MYARR', df)
mdecompressALL = Mock(side_effect=decompress)
with patch('arctic.store._ndarray_store.decompress', mdecompressALL):
library.read('MYARR').data
mdecompressLR = Mock(side_effect=decompress)
with patch('arctic.store._ndarray_store.decompress', mdecompressLR):
result = library.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data
assert len(result) == 1
assert mdecompressLR.call_count < mdecompressALL.call_count
def test_daterange_end(generic_version_store):
df = DataFrame(index=date_range(dt(2001, 1, 1), freq='S', periods=30 * 1024),
data=np.tile(np.arange(30 * 1024), 100).reshape((-1, 100)))
df.columns = [str(c) for c in df.columns]
generic_version_store.write('MYARR', df)
mdecompressALL = Mock(side_effect=decompress)
with patch('arctic.pluggable._kv_ndarray_store.decompress', mdecompressALL):
generic_version_store.read('MYARR').data
mdecompressLR = Mock(side_effect=decompress)
with patch('arctic.pluggable._kv_ndarray_store.decompress', mdecompressLR):
result = generic_version_store.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data
assert len(result) == 1
assert mdecompressLR.call_count < mdecompressALL.call_count
def test_exceptions():
data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
data = data[0:16]
with pytest.raises(Exception) as e:
c.decompress(data)
assert("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())
data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
data = [data[0:16] for x in (1, 2, 3)]
with pytest.raises(Exception) as e:
c.decompress_array(data)
assert ("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())
def get_coldata(coldata):
""" return values and rowmask """
dtype = np.dtype(coldata[DTYPE])
values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
rowmask = np.unpackbits(np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8'))
return list(values), list(rowmask)
def get_coldata(coldata):
""" return values and rowmask """
dtype = np.dtype(coldata[DTYPE])
values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
rowmask = np.unpackbits(np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8'))
return list(values), list(rowmask)
def read(self, backing_store, library_name, version, symbol, **kwargs):
segment_keys = version['segment_keys']
data = b''.join(decompress(s) for s in backing_store.read_segments(library_name, segment_keys))
return pickle_compat_load(io.BytesIO(data))
'''
from_index = index_range[0] if index_range else None
to_index = version['up_to']
if index_range and index_range[1] and index_range[1] < version['up_to']:
to_index = index_range[1]
segment_keys = version['segment_keys']
filtered_segment_keys = []
for i, segment_index in enumerate(version['raw_segment_index']):
if (from_index is None or segment_index >= from_index) and \
(to_index is None or segment_index <= to_index):
filtered_segment_keys.append(segment_keys[i])
data = bytearray()
for segment in backing_store.read_segments(library_name, filtered_segment_keys):
data.extend(decompress(segment))
dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
rtn = np.frombuffer(data, dtype=dtype).reshape(version.get('shape', (-1)))
return rtn
Where index is the 0-based index of the datetime in the DataFrame
"""
# find the index of the first datetime64 column
idx_col = self._datetime64_index(recarr)
# if one exists let's create the index on it
if idx_col is not None:
new_segments = np.array(new_segments, dtype='i8')
last_rows = recarr[new_segments - start]
# create numpy index
index = np.core.records.fromarrays([last_rows[idx_col]]
+ [new_segments, ],
dtype=INDEX_DTYPE)
# append to existing index if exists
if existing_index:
# existing_index_arr is read-only but it's never written to
existing_index_arr = np.frombuffer(decompress(existing_index), dtype=INDEX_DTYPE)
if start > 0:
existing_index_arr = existing_index_arr[existing_index_arr['index'] < start]
index = np.concatenate((existing_index_arr, index))
return Binary(compress(index.tostring()))
elif existing_index:
raise ArcticException("Could not find datetime64 index in item but existing data contains one")
return None
def _index_range(self, version, symbol, date_range=None, **kwargs):
""" Given a version, read the segment_index and return the chunks associated
with the date_range. As the segment index is (id -> last datetime)
we need to take care in choosing the correct chunks. """
if date_range and 'segment_index' in version:
# index is read-only but it's never written to
index = np.frombuffer(decompress(version['segment_index']), dtype=INDEX_DTYPE)
dtcol = self._datetime64_index(index)
if dtcol and len(index):
dts = index[dtcol]
start, end = _start_end(date_range, dts)
if start > dts[-1]:
return -1, -1
idxstart = min(np.searchsorted(dts, start), len(dts) - 1)
idxend = min(np.searchsorted(dts, end, side='right'), len(dts) - 1)
return int(index['index'][idxstart]), int(index['index'][idxend] + 1)
return super(PandasStore, self)._index_range(version, symbol, **kwargs)
"""
cols = columns or doc[METADATA][COLUMNS]
data = {}
for col in cols:
# if there is missing data in a chunk, we can default to NaN
# and pandas will autofill the missing values to the correct length
if col not in doc[METADATA][LENGTHS]:
d = [np.nan]
else:
d = decompress(doc[DATA][doc[METADATA][LENGTHS][col][0]: doc[METADATA][LENGTHS][col][1] + 1])
# d is ready-only but that's not an issue since DataFrame will copy the data anyway.
d = np.frombuffer(d, doc[METADATA][DTYPE][col])
if MASK in doc[METADATA] and col in doc[METADATA][MASK]:
mask_data = decompress(doc[METADATA][MASK][col])
mask = np.frombuffer(mask_data, 'bool')
d = ma.masked_array(d, mask)
data[col] = d
# Copy into
return pd.DataFrame(data, columns=cols, copy=True)[cols]