Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_roundtrip_multiframe_3(data):
nframes = 4
compressed = b''
ctx = lz4frame.create_compression_context()
for _ in range(nframes):
compressed += lz4frame.compress_begin(ctx)
compressed += lz4frame.compress_chunk(ctx, data)
compressed += lz4frame.compress_flush(ctx)
decompressed = b''
ctx = lz4frame.create_decompression_context()
for _ in range(nframes):
d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed)
decompressed += d
assert eof == True
assert bytes_read == len(compressed) // nframes
assert len(decompressed) == nframes * len(data)
assert data * nframes == decompressed
def test_roundtrip_multiframe_3(data):
nframes = 4
compressed = b''
ctx = lz4frame.create_compression_context()
for _ in range(nframes):
compressed += lz4frame.compress_begin(ctx)
compressed += lz4frame.compress_chunk(ctx, data)
compressed += lz4frame.compress_flush(ctx)
decompressed = b''
ctx = lz4frame.create_decompression_context()
for _ in range(nframes):
d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed)
decompressed += d
assert eof == True
assert bytes_read == len(compressed) // nframes
assert len(decompressed) == nframes * len(data)
assert data * nframes == decompressed
def test_roundtrip_multiframe_3(data):
nframes = 4
compressed = b''
ctx = lz4frame.create_compression_context()
for _ in range(nframes):
compressed += lz4frame.compress_begin(ctx)
compressed += lz4frame.compress_chunk(ctx, data)
compressed += lz4frame.compress_flush(ctx)
decompressed = b''
ctx = lz4frame.create_decompression_context()
for _ in range(nframes):
d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed)
decompressed += d
assert eof == True
assert bytes_read == len(compressed) // nframes
assert len(decompressed) == nframes * len(data)
assert data * nframes == decompressed
def test_roundtrip_multiframe_3(data):
nframes = 4
compressed = b''
ctx = lz4frame.create_compression_context()
for _ in range(nframes):
compressed += lz4frame.compress_begin(ctx)
compressed += lz4frame.compress_chunk(ctx, data)
compressed += lz4frame.compress_flush(ctx)
decompressed = b''
ctx = lz4frame.create_decompression_context()
for _ in range(nframes):
d, bytes_read, eof = lz4frame.decompress_chunk(ctx, compressed)
decompressed += d
assert eof == True
assert bytes_read == len(compressed) // nframes
assert len(decompressed) == nframes * len(data)
assert data * nframes == decompressed
def _read_bucket(self, doc, column_set, column_dtypes, include_symbol, include_images, columns):
rtn = {}
if doc[VERSION] != 3:
raise ArcticException("Unhandled document version: %s" % doc[VERSION])
# np.cumsum copies the read-only array created with frombuffer
rtn[INDEX] = np.cumsum(np.frombuffer(lz4_decompress(doc[INDEX]), dtype='uint64'))
doc_length = len(rtn[INDEX])
column_set.update(doc[COLUMNS].keys())
# get the mask for the columns we're about to load
union_mask = np.zeros((doc_length + 7) // 8, dtype='uint8')
for c in column_set:
try:
coldata = doc[COLUMNS][c]
# the or below will make a copy of this read-only array
mask = np.frombuffer(lz4_decompress(coldata[ROWMASK]), dtype='uint8')
union_mask = union_mask | mask
except KeyError:
rtn[c] = None
union_mask = np.unpackbits(union_mask)[:doc_length].astype('bool')
rtn_length = np.sum(union_mask)
rtn[INDEX] = rtn[INDEX][union_mask]
if include_symbol:
rtn['SYMBOL'] = [doc[SYMBOL], ] * rtn_length
# Unpack each requested column in turn
for c in column_set:
try:
coldata = doc[COLUMNS][c]
dtype = np.dtype(coldata[DTYPE])
# values ends up being copied by pandas before being returned to the user. However, we
def arrtolz4string_list(arr: np.ndarray) -> List[bytes]:
"""
Converts (multi-dimensional) array to list of lz4 compressed strings.
Args:
arr: Input array.
Returns:
lz4 compressed string.
"""
if isinstance(arr, list):
arr = np.array(arr)
if len(arr) == 0:
return [b""]
try:
str_lst = [compress(arr.tobytes())]
# catch Value error which is thrown in py3 lz4 version
except (OverflowError, ValueError, LZ4BlockError):
half_ix = len(arr) // 2
str_lst = arrtolz4string_list(arr[:half_ix]) + \
arrtolz4string_list(arr[half_ix:])
return str_lst
key.fNbytes += key.fObjlen
key.write(keycursor, context._sink)
cursor.write_data(context._sink, givenbytes)
elif algorithm == uproot.const.kLZ4:
algo = b"L4"
try:
import xxhash
except ImportError:
raise ImportError("Install xxhash package with:\n pip install xxhash\nor\n conda install -c conda-forge python-xxhash")
try:
import lz4.block
except ImportError:
raise ImportError("Install lz4 package with:\n pip install lz4\nor\n conda install -c anaconda lz4")
if level >= 4:
after_compressed = lz4.block.compress(givenbytes, compression=level, mode="high_compression", store_size=False)
else:
after_compressed = lz4.block.compress(givenbytes, store_size=False)
compressedbytes = len(after_compressed) + 8
checksum = xxhash.xxh64(after_compressed).digest()
if (compressedbytes + 9) < uncompressedbytes:
c1 = (compressedbytes >> 0) & 0xff
c2 = (compressedbytes >> 8) & 0xff
c3 = (compressedbytes >> 16) & 0xff
method = lz4.library_version_number() // (100 * 100)
cursor.write_fields(context._sink, _header, algo, method, c1, c2, c3, u1, u2, u3)
cursor.write_data(context._sink, checksum)
cursor.write_data(context._sink, after_compressed)
key.fObjlen = uncompressedbytes
key.fNbytes = compressedbytes + key.fKeylen + 9
key.write(keycursor, context._sink)
else:
def unserialize(cls, data, key, aggregation):
"""Unserialize an aggregated timeserie.
:param data: Raw data buffer.
:param key: A :class:`SplitKey` key.
:param aggregation: The Aggregation object of this timeseries.
"""
x, y = [], []
if data:
if cls.is_compressed(data):
# Compressed format
uncompressed = lz4.block.decompress(
memoryview(data)[1:].tobytes())
nb_points = len(uncompressed) // cls.COMPRESSED_SERIAL_LEN
try:
y = numpy.frombuffer(uncompressed, dtype='
raise ImportError("Install xxhash package with:\n pip install xxhash\nor\n conda install -c conda-forge python-xxhash")
try:
import lz4.block
except ImportError:
raise ImportError("Install lz4 package with:\n pip install lz4\nor\n conda install -c anaconda lz4")
if level >= 4:
after_compressed = lz4.block.compress(givenbytes, compression=level, mode="high_compression", store_size=False)
else:
after_compressed = lz4.block.compress(givenbytes, store_size=False)
compressedbytes = len(after_compressed) + 8
checksum = xxhash.xxh64(after_compressed).digest()
if (compressedbytes + 9) < uncompressedbytes:
c1 = (compressedbytes >> 0) & 0xff
c2 = (compressedbytes >> 8) & 0xff
c3 = (compressedbytes >> 16) & 0xff
method = lz4.library_version_number() // (100 * 100)
cursor.write_fields(context._sink, _header, algo, method, c1, c2, c3, u1, u2, u3)
cursor.write_data(context._sink, checksum)
cursor.write_data(context._sink, after_compressed)
key.fObjlen = uncompressedbytes
key.fNbytes = compressedbytes + key.fKeylen + 9
key.write(keycursor, context._sink)
else:
key.fObjlen = len(givenbytes)
key.fNbytes += key.fObjlen
key.write(keycursor, context._sink)
cursor.write_data(context._sink, givenbytes)
elif algorithm == uproot.const.kLZMA:
algo = b"XZ"
try:
import lzma
for modname, func in [("pylz4", pylz4.compress), ("rustlz4", rustlz4.compress)]:
timer = timeit.Timer(functools.partial(func, data))
elapsed = timer.timeit(number=number)
perf = size * number / elapsed / 1e6
name = "%s.%s" % (modname, func.__name__)
print("%24s: %8.2f MB/s" % (name, perf))
for modname, func in [("pylz4", pylz4.compressHC), ("rustlz4", rustlz4.compresshc)]:
timer = timeit.Timer(functools.partial(func, hcdata))
elapsed = timer.timeit(number=number)
perf = size * number / elapsed / 1e6
name = "%s.%s" % (modname, func.__name__)
print("%24s: %8.2f MB/s" % (name, perf))
data = pylz4.compress(data)
for modname, func in [("pylz4", pylz4.decompress), ("rustlz4", rustlz4.decompress)]:
timer = timeit.Timer(functools.partial(func, data))
elapsed = timer.timeit(number=number)
perf = size * number / elapsed / 1e6
name = "%s.%s" % (modname, func.__name__)
print("%24s: %8.2f MB/s" % (name, perf))