Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_spark_hist_adders():
pyspark = pytest.importorskip("pyspark", minversion="2.4.1")
import pandas as pd
import pickle as pkl
import lz4.frame as lz4f
from coffea.util import numpy as np
from coffea.processor.spark.spark_executor import agg_histos_raw, reduce_histos_raw
from coffea.processor.test_items import NanoTestProcessor
proc = NanoTestProcessor()
one = proc.accumulator.identity()
two = proc.accumulator.identity()
hlist1 = [lz4f.compress(pkl.dumps(one))]
hlist2 = [lz4f.compress(pkl.dumps(one)),lz4f.compress(pkl.dumps(two))]
harray1 = np.array(hlist1, dtype='O')
harray2 = np.array(hlist2, dtype='O')
series1 = pd.Series(harray1)
series2 = pd.Series(harray2)
df = pd.DataFrame({'histos': harray2})
# correctness of these functions is checked in test_spark_executor
agg1 = agg_histos_raw(series1, proc, 1)
agg2 = agg_histos_raw(series2, proc, 1)
red = reduce_histos_raw(df, proc, 1)
def test_roundtrip_1(
data,
block_size,
block_linked,
content_checksum,
block_checksum,
compression_level,
store_size):
compressed = lz4frame.compress(
data,
store_size=store_size,
compression_level=compression_level,
block_size=block_size,
block_linked=block_linked,
content_checksum=content_checksum,
block_checksum=block_checksum,
)
get_frame_info_check(
compressed,
len(data),
store_size,
block_size,
block_linked,
content_checksum,
]
)
def content_checksum(request):
return request.param
@pytest.fixture(
params=[
(lz4frame.FRAMETYPE_FRAME),
(lz4frame.FRAMETYPE_SKIPPABLEFRAME),
]
)
def frame_type(request):
return request.param
compression_levels = list(range(16)) + [
lz4frame.COMPRESSIONLEVEL_MIN,
lz4frame.COMPRESSIONLEVEL_MINHC,
lz4frame.COMPRESSIONLEVEL_MAX,
]
compression_levels = [
(i) for i in compression_levels
]
@pytest.fixture(
params=compression_levels
)
def compression_level(request):
return request.param
@pytest.fixture(
params=[
(True),
(False)
def test_roundtrip_multiframe_1(data):
nframes = 4
compressed = b''
for _ in range(nframes):
compressed += lz4frame.compress(data)
decompressed = b''
for _ in range(nframes):
decompressed += lz4frame.decompress(compressed)
assert len(decompressed) == nframes * len(data)
assert data * nframes == decompressed
export, __all__ = strax.exporter()
blosc.set_releasegil(True)
COMPRESSORS = dict(
bz2=dict(
compress=bz2.compress,
decompress=bz2.decompress),
zstd=dict(
compress=zstd.compress,
decompress=zstd.decompress),
blosc=dict(
compress=partial(blosc.compress, shuffle=False),
decompress=blosc.decompress),
lz4=dict(compress=lz4.compress, decompress=lz4.decompress)
)
@export
def load_file(f, compressor, dtype):
"""Read and return data from file
:param f: file name or handle to read from
:param compressor: compressor to use for decompressing. If not passed,
will try to load it from json metadata file.
:param dtype: numpy dtype of data to load
"""
if isinstance(f, str):
with open(f, mode='rb') as f:
return _load_file(f, compressor, dtype)
else:
def ray_decompress(data):
if isinstance(data, bytes) or isinstance(data, string_types):
data = base64.b64decode(data)
data = lz4.frame.decompress(data)
data = pyarrow.deserialize(data)
return data
def decompress(data):
if LZ4_ENABLED:
data = base64.b64decode(data)
data = lz4.frame.decompress(data)
data = deserialize(data)
return data
elif confidence < 50:
sindicator.confidence = "Low"
elif confidence < 75:
sindicator.confidence = "Medium"
else:
sindicator.confidence = "High"
sindicator.add_indicator_type(type_mapper['indicator_type'])
sindicator.add_observable(o)
sp.add_indicator(sindicator)
spackage = 'lz4'+lz4.frame.compress(
sp.to_json(),
compression_level=lz4.frame.COMPRESSIONLEVEL_MINHC
)
with self.SR.pipeline() as p:
p.multi()
p.zadd(self.redis_skey, score, spid)
p.hset(self.redis_skey_value, spid, spackage)
result = p.execute()[0]
self.statistics['added'] += result
def process_compressed_mdblob(self, compressed_data, skip_personal_metadata_payload=True, external_thread=False):
try:
decompressed_data = lz4.frame.decompress(compressed_data)
except RuntimeError:
self._logger.warning("Unable to decompress mdblob")
return []
return self.process_squashed_mdblob(decompressed_data, skip_personal_metadata_payload, external_thread)