Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_roundtrip_LZ4FrameCompressor(
data,
chunks,
block_size,
block_linked,
reset,
store_size,
block_checksum,
content_checksum):
with lz4frame.LZ4FrameCompressor(
block_size=block_size,
block_linked=block_linked,
content_checksum=content_checksum,
block_checksum=block_checksum,
) as compressor:
def do_compress():
if store_size is True:
compressed = compressor.begin(source_size=len(data))
else:
compressed = compressor.begin()
for chunk in get_chunked(data, chunks):
compressed += compressor.compress(chunk)
compressed += compressor.flush()
return compressed
def test_roundtrip_multiframe_4(data):
nframes = 4
compressed = b''
with lz4frame.LZ4FrameCompressor() as compressor:
for _ in range(nframes):
compressed += compressor.begin()
compressed += compressor.compress(data)
compressed += compressor.flush()
decompressed = b''
with lz4frame.LZ4FrameDecompressor() as decompressor:
for i in range(nframes):
if i == 0:
d = compressed
else:
d = decompressor.unused_data
decompressed += decompressor.decompress(d)
assert decompressor.eof == True
assert decompressor.needs_input == True
if i == nframes - 1:
def entries_to_chunk(metadata_list, chunk_size, start_index=0):
"""
For efficiency reasons, this is deliberately written in C style
:param metadata_list: the list of metadata to process.
:param chunk_size: the desired chunk size limit, in bytes. The produced chunk's size will never exceed this value.
:param start_index: the index of the element of metadata_list from which the processing should start.
:return: (chunk, last_entry_index) tuple, where chunk is the resulting chunk in string form and
last_entry_index is the index of the element of the input list that was put into the chunk the last.
"""
# Try to fit as many blobs into this chunk as permitted by chunk_size and
# calculate their ends' offsets in the blob
last_entry_index = None
with lz4.frame.LZ4FrameCompressor(auto_flush=True) as c:
header = c.begin()
offset = len(header)
out_list = [header] # LZ4 header
for index, metadata in enumerate(metadata_list[start_index:], start_index):
blob = c.compress((metadata.serialized_delete() if metadata.status == TODELETE else metadata.serialized()))
# Chunk size limit reached?
if offset + len(blob) > (chunk_size - LZ4_END_MARK_SIZE):
break
# Now that we now it will fit in, we can safely append it
offset += len(blob)
last_entry_index = index
out_list.append(blob)
out_list.append(c.flush()) # LZ4 end mark
chunk = b''.join(out_list)
if last_entry_index is None:
return data
@staticmethod
def decompress(data): # pragma: no cover
return data
try:
import lz4.frame
try:
lz4.frame._compression.BUFFER_SIZE = BUFFER_SIZE
except AttributeError: # pragma: no cover
pass
lz4_open = functools.partial(lz4.frame.open, block_size=lz4.frame.BLOCKSIZE_MAX1MB)
lz4_compress = functools.partial(lz4.frame.compress, block_size=lz4.frame.BLOCKSIZE_MAX1MB)
lz4_compressobj = lz4.frame.LZ4FrameCompressor
lz4_decompress = lz4.frame.decompress
lz4_decompressobj = lz4.frame.LZ4FrameDecompressor
except ImportError: # pragma: no cover
lz4_open = None
lz4_compress, lz4_compressobj = None, None
lz4_decompress, lz4_decompressobj = None, None
gz_open = gzip.open
gz_compressobj = functools.partial(
lambda level=-1: zlib.compressobj(level, zlib.DEFLATED, 16 + zlib.MAX_WBITS, zlib.DEF_MEM_LEVEL, 0)
)
gz_decompressobj = functools.partial(lambda: zlib.decompressobj(16 + zlib.MAX_WBITS))
gz_compress = gzip.compress
gz_decompress = gzip.decompress
SERIAL_VERSION = 0
approx_index.build(approx_trees)
approx_index_file_path = os.path.join(
tempfile.mkdtemp(),
fast_md5_file(input_file_path) + '.ann')
eprint("Dumping approximate nearest neighbors index... \
(this may take some time)")
approx_index.save(approx_index_file_path)
eprint("Compressing approximate nearest neighbors index... \
(this may take some time)")
chunk_size = 104857600
full_size = os.path.getsize(approx_index_file_path)
insert_approx_query = """
INSERT INTO magnitude_approx(trees, index_file) VALUES (?, ?);
"""
with open(approx_index_file_path, 'rb') as ifh, \
lz4.frame.LZ4FrameCompressor() as compressor:
for i, chunk in enumerate(iter(partial(ifh.read, chunk_size), b'')):
if i == 0:
chunk = compressor.begin() + compressor.compress(chunk)
else:
chunk = compressor.compress(chunk)
eprint(str((ifh.tell() / float(full_size)) * 100.0) + "%")
if len(chunk) > 0:
db.execute(insert_approx_query,
(approx_trees, sqlite3.Binary(chunk)))
chunk = compressor.flush()
if len(chunk) > 0:
db.execute(insert_approx_query,
(approx_trees, sqlite3.Binary(chunk)))
files_to_remove.append(approx_index_file_path)
for meta_name, meta_path in metas:
(approx_trees, sqlite3.Binary(chunk)))
files_to_remove.append(approx_index_file_path)
for meta_name, meta_path in metas:
if not meta_path:
continue
eprint("Compressing meta file... \
(this may take some time)")
chunk_size = 104857600
full_size = os.path.getsize(meta_path)
insert_meta_query = """
INSERT INTO magnitude_""" + meta_name + """(meta_file)
VALUES (?);
"""
with open(meta_path, 'rb') as ifh, \
lz4.frame.LZ4FrameCompressor() as compressor:
for i, chunk in enumerate(iter(partial(ifh.read, chunk_size), b'')):
if i == 0:
chunk = compressor.begin() + compressor.compress(chunk)
else:
chunk = compressor.compress(chunk)
eprint(str((ifh.tell() / float(full_size)) * 100.0) + "%")
if len(chunk) > 0:
db.execute(insert_meta_query,
(sqlite3.Binary(chunk),))
chunk = compressor.flush()
if len(chunk) > 0:
db.execute(insert_meta_query,
(sqlite3.Binary(chunk),))
# Clean up
if len(files_to_remove) > 0: