How to use the arctic._compression.decompress function in arctic

To help you get started, we’ve selected a few arctic examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github man-group / arctic / tests / integration / store / test_pandas_store.py View on Github external
def test_daterange_end(library):
    df = DataFrame(index=date_range(dt(2001, 1, 1), freq='S', periods=30 * 1024),
                   data=np.tile(np.arange(30 * 1024), 100).reshape((-1, 100)))
    df.columns = [str(c) for c in df.columns]
    library.write('MYARR', df)
    mdecompressALL = Mock(side_effect=decompress)
    with patch('arctic.store._ndarray_store.decompress', mdecompressALL):
        library.read('MYARR').data
    mdecompressLR = Mock(side_effect=decompress)
    with patch('arctic.store._ndarray_store.decompress', mdecompressLR):
        result = library.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data
    assert len(result) == 1
    assert mdecompressLR.call_count < mdecompressALL.call_count
github man-group / arctic / tests / integration / pluggable / test_pandas_store.py View on Github external
def test_daterange_end(generic_version_store):
    df = DataFrame(index=date_range(dt(2001, 1, 1), freq='S', periods=30 * 1024),
                   data=np.tile(np.arange(30 * 1024), 100).reshape((-1, 100)))
    df.columns = [str(c) for c in df.columns]
    generic_version_store.write('MYARR', df)
    mdecompressALL = Mock(side_effect=decompress)
    with patch('arctic.pluggable._kv_ndarray_store.decompress', mdecompressALL):
        generic_version_store.read('MYARR').data
    mdecompressLR = Mock(side_effect=decompress)
    with patch('arctic.pluggable._kv_ndarray_store.decompress', mdecompressLR):
        result = generic_version_store.read('MYARR', date_range=DateRange(df.index[-1], df.index[-1])).data
    assert len(result) == 1
    assert mdecompressLR.call_count < mdecompressALL.call_count
github man-group / arctic / tests / integration / test_compress_integration.py View on Github external
def test_exceptions():
    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = data[0:16]
    with pytest.raises(Exception) as e:
        c.decompress(data)
    assert("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())

    data = c.compress(b'1010101010100000000000000000000000000000000000000000000000000000000011111111111111111111111111111')
    data = [data[0:16] for x in (1, 2, 3)]
    with pytest.raises(Exception) as e:
        c.decompress_array(data)
    assert ("decompressor wrote" in str(e.value).lower() or "corrupt input at" in str(e.value).lower() or "decompression failed: corrupt input" in str(e.value).lower())
github man-group / arctic / tests / unit / tickstore / test_tickstore.py View on Github external
def get_coldata(coldata):
    """ return values and rowmask """
    dtype = np.dtype(coldata[DTYPE])
    values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
    rowmask = np.unpackbits(np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8'))
    return list(values), list(rowmask)
github man-group / arctic / tests / unit / tickstore / test_tickstore.py View on Github external
def get_coldata(coldata):
    """ return values and rowmask """
    dtype = np.dtype(coldata[DTYPE])
    values = np.frombuffer(decompress(coldata[DATA]), dtype=dtype)
    rowmask = np.unpackbits(np.frombuffer(decompress(coldata[ROWMASK]), dtype='uint8'))
    return list(values), list(rowmask)
github man-group / arctic / arctic / pluggable / _pickle_store.py View on Github external
def read(self, backing_store, library_name, version, symbol, **kwargs):
        segment_keys = version['segment_keys']
        data = b''.join(decompress(s) for s in backing_store.read_segments(library_name, segment_keys))
        return pickle_compat_load(io.BytesIO(data))
github man-group / arctic / arctic / pluggable / _kv_ndarray_store.py View on Github external
'''
        from_index = index_range[0] if index_range else None
        to_index = version['up_to']
        if index_range and index_range[1] and index_range[1] < version['up_to']:
            to_index = index_range[1]

        segment_keys = version['segment_keys']
        filtered_segment_keys = []
        for i, segment_index in enumerate(version['raw_segment_index']):
            if (from_index is None or segment_index >= from_index) and \
                    (to_index is None or segment_index <= to_index):
                filtered_segment_keys.append(segment_keys[i])

        data = bytearray()
        for segment in backing_store.read_segments(library_name, filtered_segment_keys):
            data.extend(decompress(segment))

        dtype = self._dtype(version['dtype'], version.get('dtype_metadata', {}))
        rtn = np.frombuffer(data, dtype=dtype).reshape(version.get('shape', (-1)))
        return rtn
github man-group / arctic / arctic / pluggable / _pandas_ndarray_store.py View on Github external
Where index is the 0-based index of the datetime in the DataFrame
        """
        # find the index of the first datetime64 column
        idx_col = self._datetime64_index(recarr)
        # if one exists let's create the index on it
        if idx_col is not None:
            new_segments = np.array(new_segments, dtype='i8')
            last_rows = recarr[new_segments - start]
            # create numpy index
            index = np.core.records.fromarrays([last_rows[idx_col]]
                                               + [new_segments, ],
                                               dtype=INDEX_DTYPE)
            # append to existing index if exists
            if existing_index:
                # existing_index_arr is read-only but it's never written to
                existing_index_arr = np.frombuffer(decompress(existing_index), dtype=INDEX_DTYPE)
                if start > 0:
                    existing_index_arr = existing_index_arr[existing_index_arr['index'] < start]
                index = np.concatenate((existing_index_arr, index))
            return Binary(compress(index.tostring()))
        elif existing_index:
            raise ArcticException("Could not find datetime64 index in item but existing data contains one")
        return None
github man-group / arctic / arctic / pluggable / _pandas_ndarray_store.py View on Github external
def _index_range(self, version, symbol, date_range=None, **kwargs):
        """ Given a version, read the segment_index and return the chunks associated
        with the date_range. As the segment index is (id -> last datetime)
        we need to take care in choosing the correct chunks. """
        if date_range and 'segment_index' in version:
            # index is read-only but it's never written to
            index = np.frombuffer(decompress(version['segment_index']), dtype=INDEX_DTYPE)
            dtcol = self._datetime64_index(index)
            if dtcol and len(index):
                dts = index[dtcol]
                start, end = _start_end(date_range, dts)
                if start > dts[-1]:
                    return -1, -1
                idxstart = min(np.searchsorted(dts, start), len(dts) - 1)
                idxend = min(np.searchsorted(dts, end, side='right'), len(dts) - 1)
                return int(index['index'][idxstart]), int(index['index'][idxend] + 1)
        return super(PandasStore, self)._index_range(version, symbol, **kwargs)
github man-group / arctic / arctic / serialization / numpy_arrays.py View on Github external
"""
        cols = columns or doc[METADATA][COLUMNS]
        data = {}

        for col in cols:
            # if there is missing data in a chunk, we can default to NaN
            # and pandas will autofill the missing values to the correct length
            if col not in doc[METADATA][LENGTHS]:
                d = [np.nan]
            else:
                d = decompress(doc[DATA][doc[METADATA][LENGTHS][col][0]: doc[METADATA][LENGTHS][col][1] + 1])
                # d is ready-only but that's not an issue since DataFrame will copy the data anyway.
                d = np.frombuffer(d, doc[METADATA][DTYPE][col])

                if MASK in doc[METADATA] and col in doc[METADATA][MASK]:
                    mask_data = decompress(doc[METADATA][MASK][col])
                    mask = np.frombuffer(mask_data, 'bool')
                    d = ma.masked_array(d, mask)
            data[col] = d

        # Copy into
        return pd.DataFrame(data, columns=cols, copy=True)[cols]