How to use the blosc.decompress function in blosc

To help you get started, we’ve selected a few blosc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

daeyun / object-shapes-cvpr18 / python / mvshape / eval_out_reader.py View on Github

def read_split_metadata(split_file, subset_tags=None):
    """

    :param split_file:
    :param subset_tags: For example, ['NOVELCLASS', 'NOVELMODEL', 'NOVELVIEW']
    :return:
    """

    # Make sure cpp implementation is used for parsing protocol buffers. Otherwise it will be too slow.
    from google.protobuf.internal import api_implementation
    protobuf_impl_name = api_implementation._default_implementation_type
    assert 'cpp' == protobuf_impl_name, protobuf_impl_name

    with open(split_file, mode='rb') as f:
        compressed = f.read()
    decompressed = blosc.decompress(compressed)
    examples = dataset_pb2.Examples()
    examples.ParseFromString(decompressed)

    if subset_tags is not None:
        assert isinstance(subset_tags, (list, tuple))
        experiment_names = subset_tags

        ret = collections.defaultdict(list)

        for example in examples.examples:
            tags = tags_from_example(example)
            for name in experiment_names:
                if name in tags:
                    ret[name].append(example)
    else:
        ret = list(examples.examples)

analysiscenter / batchflow / batchflow / batch.py View on Github

def _load_blosc(self, ix, src=None, dst=None):
        """ Load data from a blosc packed file """
        file_name = self._get_file_name(ix, src)
        with open(file_name, 'rb') as f:
            data = dill.loads(blosc.decompress(f.read()))
            components = tuple(dst or self.components)
            try:
                item = tuple(data[i] for i in components)
            except Exception as e:
                raise KeyError('Cannot find components in corresponfig file', e)
        return item

pydata / pandas-msgpack / pandas_msgpack / packers.py View on Github

elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == u'zlib':
            _check_zlib()
            decompress = zlib.decompress
        elif compress == u'blosc':
            _check_blosc()
            decompress = blosc.decompress
        else:
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

        try:
            return np.frombuffer(
                _move_into_mutable_buffer(decompress(values)),
                dtype=dtype,
            )
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many

spyking-circus / spyking-circus / circus / shared / mpi.py View on Github

displacements = [numpy.int64(sum(sizes[:i])) for i in range(len(sizes))]

    np_type       = get_np_dtype(dtype)
    mpi_type      = get_mpi_type(dtype)    
    data_shape    = data.shape

    if not compress:
        gdata = numpy.empty(numpy.int64(sum(sizes)), dtype=np_type)
        mpi_comm.Gatherv([data.flatten(), size, mpi_type], [gdata, (sizes, displacements), mpi_type], root=root)
    else:
        data = blosc.compress(data, typesize=mpi_type.size, cname='blosclz')
        data = mpi_comm.gather(data, root=0)
        gdata = numpy.empty(0, dtype=np_type)
        if comm.rank == 0:
            for blosc_data in data:
                gdata = numpy.concatenate((gdata, numpy.frombuffer(blosc.decompress(blosc_data), dtype=np_type)))

    if len(data_shape) == 1:
        return gdata
    else:
        if shape == 0:
            num_lines = data_shape[0]
            if num_lines > 0:
                return gdata.reshape((num_lines, gdata.size//num_lines))
            else:
                return gdata.reshape((0, gdata.shape[1]))
        if shape == 1:
            num_columns = data_shape[1]
            if num_columns > 0:
                return gdata.reshape((gdata.size//num_columns, num_columns))
            else:
                return gdata.reshape((gdata.shape[0], 0))

jhuapl-boss / boss-tools / lambda / downsample_volume.py View on Github

volume_empty = True # abort if the volume doesn't exist in S3
    for offset in xyz_range(step):
        if args.get('test'):
            # Enable Test Mode
            # This is where the cubes downsamples are all taken from 0/0/0
            # so that the entire frame doesn't have to be populated to test
            # the code paths that downsample cubes
            cube = offset # use target 0/0/0
        else:
            cube = target + offset

        try:
            obj_key = HashedKey(parent_iso, col_id, exp_id, chan_id, resolution, t, cube.morton, version=version)
            data = s3.get(obj_key)
            data = blosc.decompress(data)

            # DP ???: Check to see if the buffer is all zeros?
            data = Buffer.frombuffer(data, dtype=np_types[data_type])
            data.resize(dim)

            #log.debug("Downloaded cube {}".format(cube))
            volume[offset * dim: (offset + 1) * dim] = data
            volume_empty = False
        except Exception as e: # TODO: Create custom exception for S3 download
            #log.exception("Problem downloading cubes {}".format(cube))
            #log.debug("No cube at {}".format(cube))

            # Eat the error, we don't care if the cube doesn't exist
            # If the cube doesn't exist blank data will be used for downsampling
            # If all the cubes don't exist, then the downsample is finished
            pass

jhuapl-boss / boss / django / bossspatialdb / parsers.py View on Github

# Get bit depth
        try:
            bit_depth = resource.get_bit_depth()
        except ValueError:
            return BossParserError("Unsupported data type provided to parser: {}".format(resource.get_data_type()),
                                   ErrorCodes.TYPE_ERROR)

        # Make sure cutout request is under 500MB UNCOMPRESSED
        if is_too_large(req, bit_depth):
            return BossParserError("Cutout request is over 500MB when uncompressed. Reduce cutout dimensions.",
                                   ErrorCodes.REQUEST_TOO_LARGE)

        try:
            # Decompress
            raw_data = blosc.decompress(stream.read())
            data_mat = np.fromstring(raw_data, dtype=resource.get_numpy_data_type())
        except MemoryError:
            return BossParserError("Ran out of memory decompressing data.",
                                    ErrorCodes.BOSS_SYSTEM_ERROR)
        except:
            return BossParserError("Failed to decompress data. Verify the datatype/bitdepth of your data "
                                   "matches the channel.", ErrorCodes.DATATYPE_DOES_NOT_MATCH)

        # Reshape and return
        try:
            if req.time_request:
                # Time series request (even if single time point) - Get 4D matrix
                parsed_data = np.reshape(data_mat,
                                         (len(req.get_time()),
                                          req.get_z_span(),
                                          req.get_y_span(),

tensorwerk / hangar-py / src / hangar / remote / server.py View on Github

def FetchFindMissingHashRecords(self, request_iterator, context):
        """Determine data tensor hash records existing on the server and not on the client.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                commit = request.commit
                hBytes, offset = bytearray(request.total_byte_size), 0
            size = len(request.hashs)
            hBytes[offset: offset + size] = request.hashs
            offset += size

        uncompBytes = blosc.decompress(hBytes)
        c_hashs_raw = chunks.deserialize_record_pack(uncompBytes)
        c_hashset = set([chunks.deserialize_ident(raw).digest for raw in c_hashs_raw])

        with tempfile.TemporaryDirectory() as tempD:
            tmpDF = os.path.join(tempD, 'test.lmdb')
            tmpDB = lmdb.open(path=tmpDF, **c.LMDB_SETTINGS)
            commiting.unpack_commit_ref(self.env.refenv, tmpDB, commit)
            s_hashes_schemas = queries.RecordQuery(tmpDB).data_hash_to_schema_hash()
            s_hashes = set(s_hashes_schemas.keys())
            tmpDB.close()

        c_missing = list(s_hashes.difference(c_hashset))
        c_hash_schemas_raw = [chunks.serialize_ident(c_mis, s_hashes_schemas[c_mis]) for c_mis in c_missing]
        raw_pack = chunks.serialize_record_pack(c_hash_schemas_raw)
        err = hangar_service_pb2.ErrorProto(code=0, message='OK')
        response_pb = hangar_service_pb2.FindMissingHashRecordsReply

pyacq / pyacq / pyacq / core / streamconverter / plaindata_to_sharedmem.py View on Github

self.start(first_start = False)
                continue
            m0,m1 = self.recv_socket.recv_multipart()
            self.last_packet_time = time.time()
            
            abs_pos = msgpack.loads(m0)
            if self.last_pos>abs_pos:
                print 'restart because last not googd'
                self.start(first_start = False)
                
                continue
            
            if self.compress is None:
                buf = buffer(m1)
            elif self.compress == 'blosc':
                buf = blosc.decompress(m1)
            
            chunk = np.frombuffer(buf, dtype = np_array.dtype, ).reshape(-1, n).transpose()
            #~ print 'recv', abs_pos, chunk.shape
            
            new = chunk.shape[1]
            head = abs_pos%half_size+half_size
            tail = head - new
            np_array[:,  tail:head] = chunk

            head = abs_pos%half_size+half_size
            tail = head - new
            np_array[:,  tail:head] = chunk
            head2 = abs_pos%half_size
            tail2 = max(head2 - new, 0)
            new2 = head2-tail2
            if new2!=0:

tensorwerk / hangar-py / src / hangar / remote / server.py View on Github

In order to prevent errors or malicious behavior, the cryptographic hash
        of every tensor is calculated and compared to what the client "said" it
        is. If an error is detected, no sample in the entire stream will be
        saved to disk.
        """
        for idx, request in enumerate(request_iterator):
            if idx == 0:
                uncomp_nbytes = request.uncomp_nbytes
                comp_nbytes = request.comp_nbytes
                dBytes, offset = bytearray(comp_nbytes), 0
            size = len(request.raw_data)
            dBytes[offset: offset + size] = request.raw_data
            offset += size

        uncompBytes = blosc.decompress(dBytes)
        if uncomp_nbytes != len(uncompBytes):
            msg = f'ERROR: uncomp_nbytes sent: {uncomp_nbytes} != received {comp_nbytes}'
            context.set_details(msg)
            context.set_code(grpc.StatusCode.DATA_LOSS)
            err = hangar_service_pb2.ErrorProto(code=15, message=msg)
            reply = hangar_service_pb2.PushDataReply(error=err)
            return reply

        unpacked_records = chunks.deserialize_record_pack(uncompBytes)
        received_data = []
        for record in unpacked_records:
            data = chunks.deserialize_record(record)
            schema_hash = data.schema
            received_hash = array_hash_digest(data.array)
            if received_hash != data.digest:
                msg = f'HASH MANGLED, received: {received_hash} != expected digest: {data.digest}'

Blosc / bloscpack / bloscpack / append.py View on Github

if blosc_args['shuffle'] is None:
        blosc_args['shuffle'] = DEFAULT_SHUFFLE
    if blosc_args['cname'] is None:
        blosc_args['cname'] = DEFAULT_CNAME
    _check_blosc_args(blosc_args)
    offsets_pos = (BLOSCPACK_HEADER_LENGTH +
                  (METADATA_HEADER_LENGTH + metadata_header['max_meta_size'] +
                      CHECKSUMS_LOOKUP[metadata_header['meta_checksum']].size
                   if metadata is not None else 0))
    # seek to the final offset
    original_fp.seek(offsets[-1], 0)
    # decompress the last chunk
    compressed, blosc_header, digest = _read_compressed_chunk_fp(original_fp,
                                                         checksum_impl)
    # TODO check digest
    decompressed = blosc.decompress(compressed)
    # figure out how many bytes we need to read to rebuild the last chunk
    ultimo_length = len(decompressed)
    bytes_to_read = bloscpack_header.chunk_size - ultimo_length
    if new_size &lt;= bytes_to_read:
        # special case
        # must squeeze data into last chunk
        fill_up = new_content_fp.read(new_size)
        # seek back to the position of the original last chunk
        original_fp.seek(offsets[-1], 0)
        # write the chunk that has been filled up
        compressed = _compress_chunk_str(decompressed + fill_up, blosc_args)
        digest = checksum_impl(compressed)
        _write_compressed_chunk(original_fp, compressed, digest)
        # return 0 to indicate that no new chunks have been written
        # build the new header
        bloscpack_header.last_chunk += new_size

How to use the blosc.decompress function in blosc

To help you get started, we’ve selected a few blosc examples, based on popular ways it is used in public projects.

blosc

Package Health Score

Popular blosc functions

Similar packages