How to use the blosc.compress function in blosc

To help you get started, we’ve selected a few blosc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github daeyun / object-shapes-cvpr18 / python / mvshape / io_utils.py View on Github external
def save_array_compressed(filename, arr: np.ndarray):
    encoded = array_to_bytes(arr)
    compressed = blosc.compress(encoded, arr.dtype.itemsize, clevel=9, shuffle=True, cname='lz4hc')
    with open(filename, mode='wb') as f:
        f.write(compressed)
github tensorwerk / hangar-py / src / hangar / remote / chunks.py View on Github external
def missingHashIterator(commit, hashes, err, pb2_func):
    hash_bytes = msgpack.packb(hashes)
    comp_bytes = blosc.compress(
        hash_bytes, cname='blosclz', clevel=3, typesize=1, shuffle=blosc.SHUFFLE)

    rpc_method = pb2_func(
        commit=commit,
        total_byte_size=len(comp_bytes),
        error=err)

    chunkIterator = chunk_bytes(comp_bytes)
    for bchunk in chunkIterator:
        rpc_method.hashs = bchunk
        yield rpc_method
github jhuapl-boss / boss-tools / lambda / downsample_volume.py View on Github external
log.debug("Completely empty volume, not downsampling")
        return

    # Create downsampled cube
    new_dim = XYZ(*CUBOIDSIZE[resolution + 1])
    cube =  Buffer.zeros(new_dim, dtype=np_types[data_type], order='C')
    cube.dim = new_dim
    cube.cubes = XYZ(1,1,1)

    downsample_cube(volume, cube, annotation_chan)

    target = target / step # scale down the output

    # Save new cube in S3
    obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t, target.morton, version=version)
    compressed = blosc.compress(cube, typesize=(np.dtype(cube.dtype).itemsize))
    s3.put(obj_key, compressed)

    # Update indicies
    # Same key scheme, but without the version
    obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t, target.morton)
    # Create S3 Index if it doesn't exist
    idx_key = S3IndexKey(obj_key, version)
    if not s3_index.exists(idx_key):
        ingest_job = 0 # Valid to be 0, as posting a cutout uses 0
        idx_key = S3IndexKey(obj_key,
                             version,
                             col_id,
                             '{}&{}&{}&{}'.format(exp_id, chan_id, resolution + 1, ingest_job),
                             # Replaced call to SPDB AWSObjectStore.generate_lookup_key, as SPDB master doesn't contain this call
                             # AWSObjectStore.generate_lookup_key(col_id, exp_id, chan_id, resolution + 1)
                             '{}&{}&{}&{}&{}'.format(col_id, exp_id, chan_id, resolution + 1, randrange(LOOKUP_KEY_MAX_N)))
github spyking-circus / spyking-circus / circus / shared / mpi.py View on Github external
assert len(data.shape) < 3
    # first we pass the data size
    size  = data.size
    sizes = mpi_comm.gather(size, root=root) or []
    # now we pass the data
    displacements = [numpy.int64(sum(sizes[:i])) for i in range(len(sizes))]

    np_type       = get_np_dtype(dtype)
    mpi_type      = get_mpi_type(dtype)    
    data_shape    = data.shape

    if not compress:
        gdata = numpy.empty(numpy.int64(sum(sizes)), dtype=np_type)
        mpi_comm.Gatherv([data.flatten(), size, mpi_type], [gdata, (sizes, displacements), mpi_type], root=root)
    else:
        data = blosc.compress(data, typesize=mpi_type.size, cname='blosclz')
        data = mpi_comm.gather(data, root=0)
        gdata = numpy.empty(0, dtype=np_type)
        if comm.rank == 0:
            for blosc_data in data:
                gdata = numpy.concatenate((gdata, numpy.frombuffer(blosc.decompress(blosc_data), dtype=np_type)))

    if len(data_shape) == 1:
        return gdata
    else:
        if shape == 0:
            num_lines = data_shape[0]
            if num_lines > 0:
                return gdata.reshape((num_lines, gdata.size//num_lines))
            else:
                return gdata.reshape((0, gdata.shape[1]))
        if shape == 1:
github jhuapl-boss / boss / django / bossspatialdb / renderers.py View on Github external
renderer_context["accepted_media_type"] = 'application/json'
            self.media_type = 'application/json'
            self.format = 'json'
            err_msg = {"status": 403, "message": "Access denied, are you logged in?",
                       "code": 2005}
            jr = JSONRenderer()
            return jr.render(err_msg, 'application/json', renderer_context)

        if not data["data"].data.flags['C_CONTIGUOUS']:
            data["data"].data = np.ascontiguousarray(data["data"].data, dtype=data["data"].data.dtype)

        # Return data, squeezing time dimension if only a single point
        if data["time_request"]:
            return blosc.compress(data["data"].data, typesize=renderer_context['view'].bit_depth)
        else:
            return blosc.compress(np.squeeze(data["data"].data, axis=(0,)),
                                  typesize=renderer_context['view'].bit_depth)
github ryfeus / lambda-packs / Pandas_numpy / source / pandas / io / packers.py View on Github external
return v.tolist()

        # convert to a bytes array
        v = v.tostring()
        return ExtType(0, zlib.compress(v))

    elif compressor == 'blosc':
        _check_blosc()

        # return string arrays like they are
        if dtype == np.object_:
            return v.tolist()

        # convert to a bytes array
        v = v.tostring()
        return ExtType(0, blosc.compress(v, typesize=dtype.itemsize))

    # ndarray (on original dtype)
    return ExtType(0, v.tostring())
github tensorwerk / hangar-py / src / hangar / remote / chunks.py View on Github external
def missingHashRequestIterator(commit, hashes, pb2_func):
    hash_bytes = msgpack.packb(hashes)
    comp_bytes = blosc.compress(
        hash_bytes, cname='blosclz', clevel=3, typesize=1, shuffle=blosc.SHUFFLE)

    rpc_method = pb2_func(
        commit=commit,
        total_byte_size=len(comp_bytes))

    chunkIterator = chunk_bytes(comp_bytes)
    for bchunk in chunkIterator:
        rpc_method.hashs = bchunk
        yield rpc_method
github tensorwerk / hangar-py / src / hangar / remote / client.py View on Github external
Parameters
        ----------
        digest : str
            digest being sent
        labelVal : bytes
            raw bytes of the label value

        Returns
        -------
        hangar_service_pb2.PushLabelReply
            standard error proto indicating success
        """
        rec = hangar_service_pb2.HashRecord(digest=digest)
        request = hangar_service_pb2.PushLabelRequest(rec=rec)
        request.blob = blosc.compress(labelVal)
        reply = self.stub.PushLabel(request)
        return reply
github analysiscenter / batchflow / batchflow / batch.py View on Github external
def _dump_blosc(self, ix, dst, components=None):
        """ Save blosc packed data to file """
        file_name = self._get_file_name(ix, dst)
        with open(file_name, 'w+b') as f:
            if self.components is None:
                components = (None,)
                item = (self[ix],)
            else:
                components = tuple(components or self.components)
                item = self[ix].as_tuple(components)
            data = dict(zip(components, item))
            f.write(blosc.compress(dill.dumps(data)))