How to use the h5py.special_dtype function in h5py

To help you get started, we’ve selected a few h5py examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vatlab / varianttools / src / variant_tools / importer_allele_hdf5.py View on Github external
else:
            chunk_shape = (chunk_length, min(chunk_width, data.shape[1])) + data.shape[2:]

        # create dataset
        group, name = k.split('/')
        if name in root[group]:
            if overwrite:
                del root[group][name]
            else:
                raise ValueError('dataset exists at path %r; use overwrite=True to replace' % k)

        shape = (0,) + data.shape[1:]
        maxshape = (None,) + data.shape[1:]
        if data.dtype.kind == 'O':
            if vlen:
                dt = h5py.special_dtype(vlen=str)
            else:
                data = data.astype('S')
                dt = data.dtype
        else:
            dt = data.dtype
        ds = root[group].create_dataset(
            name, shape=shape, maxshape=maxshape, chunks=chunk_shape, dtype=dt,
            compression=compression, compression_opts=compression_opts, shuffle=shuffle
        )

        # copy metadata from VCF headers
        # meta = None
        # if group == 'variants' and name in headers.infos:
        #     meta = headers.infos[name]
        # elif group == 'calldata' and name in headers.formats:
        #     meta = headers.formats[name]
github Calamari-OCR / calamari / calamari_ocr / ocr / datasets / hdf5_dataset / dataset.py View on Github external
def store(self, extension):
        for filename, data in self.prediction.items():
            texts = data['transcripts']
            codec = data['codec']
            basename, ext = split_all_ext(filename)
            with h5py.File(basename + extension, 'w') as file:
                dt = h5py.special_dtype(vlen=np.dtype('int32'))
                file.create_dataset('transcripts', (len(texts),), dtype=dt)
                file['transcripts'][...] = texts
                file.create_dataset('codec', data=list(map(ord, codec)))
github achalddave / pytorch-extract-features / extract_features.py View on Github external
num_workers=workers,
        pin_memory=True)

    features = {}
    for i, (input_data, paths) in enumerate(tqdm(loader)):
        input_var = torch.autograd.Variable(input_data, volatile=True).cuda()
        current_features = model(input_var).data.cpu().numpy()
        for j, image_path in enumerate(paths):
            features[image_path] = current_features[j]

    feature_shape = features[list(features.keys())[0]].shape
    logging.info('Feature shape: %s' % (feature_shape, ))
    logging.info('Outputting features')

    if sys.version_info >= (3, 0):
        string_type = h5py.special_dtype(vlen=str)
    else:
        string_type = h5py.special_dtype(vlen=unicode)  # noqa
    paths = features.keys()
    logging.info('Stacking features')
    features_stacked = np.vstack([features[path] for path in paths])
    logging.info('Output feature size: %s' % (features_stacked.shape, ))
    with h5py.File(output_hdf5, 'a') as f:
        f.create_dataset('features', data=features_stacked)
        f.create_dataset(
            'image_names',
            (len(paths), ),
            dtype=string_type)
        # For some reason, assigning the list directly causes an error, so we
        # assign it in a loop.
        for i, image_path in enumerate(paths):
            f['image_names'][i] = image_path_to_name(image_path)
github gallantlab / pycortex / cortex / dataset / dataset.py View on Github external
def _pack_subjs(h5, subjects):
    for subject in subjects:
        rois = db.get_overlay(subject)
        rnode = h5.require_dataset("/subjects/%s/rois"%subject, (1,),
                                   dtype=h5py.special_dtype(vlen=str))
        rnode[0] = rois.toxml(pretty=False)

        surfaces = db.get_paths(subject)['surfs']
        for surf in surfaces.keys():
            for hemi in ("lh", "rh"):
                pts, polys = db.get_surf(subject, surf, hemi)
                group = "/subjects/%s/surfaces/%s/%s"%(subject, surf, hemi)
                _hdf_write(h5, pts, "pts", group)
                _hdf_write(h5, polys, "polys", group)
github fact-project / pyfact / fact / io.py View on Github external
name for the new dataset
    **kwargs:
        all **kwargs are passed to create_dataset, useful for e.g. compression
    '''
    dtype = array.dtype
    maxshape = [None] + list(array.shape)[1:]
    shape = [0] + list(array.shape)[1:]
    attrs = {}

    if dtype.base == object:
        if isinstance(array[0], list):
            dt = np.array(array[0]).dtype
            shape = [0, len(array[0])]
            maxshape = [None, len(array[0])]
        else:
            dt = h5py.special_dtype(vlen=str)

    elif dtype.type == np.datetime64:
        # save dates as ISO string, create dummy date to get correct length
        dt = np.array(0, dtype=dtype).astype('S').dtype
        attrs['timeformat'] = 'iso'

    else:
        dt = dtype.base

    # add default compression options if no options are given
    if 'compression' not in kwargs:
        kwargs.update(DEFAULT_COMPRESSION)

    dataset = group.create_dataset(
        name,
        shape=tuple(shape),
github gallantlab / pycortex / cortex / dataset / __init__.py View on Github external
def _pack_subjs(h5, subjects):
    for subject in subjects:
        rois = db.get_overlay(subject, type='rois')
        rnode = h5.require_dataset("/subjects/%s/rois"%subject, (1,),
            dtype=h5py.special_dtype(vlen=str))
        rnode[0] = rois.toxml(pretty=False)

        surfaces = db.get_paths(subject)['surfs']
        for surf in surfaces.keys():
            for hemi in ("lh", "rh"):
                pts, polys = db.get_surf(subject, surf, hemi)
                group = "/subjects/%s/surfaces/%s/%s"%(subject, surf, hemi)
                _hdf_write(h5, pts, "pts", group)
                _hdf_write(h5, polys, "polys", group)
github nschloe / pygmsh / pygmsh / writer.py View on Github external
# add nodes
    nodes = tstt.create_group('nodes')
    coords = nodes.create_dataset('coordinates', data=points)
    coords.attrs.create('start_id', global_id)
    global_id += len(points)

    # add point data
    if point_data:
        tags = nodes.create_group('tags')
        for key, value in point_data.items():
            tags.create_dataset(key, data=value)

    # add elements
    elements = tstt.create_group('elements')

    elem_dt = h5py.special_dtype(
        enum=('i', {
            'Edge': 1,
            'Tri': 2,
            'Quad': 3,
            'Polygon': 4,
            'Tet': 5,
            'Pyramid': 6,
            'Prism': 7,
            'Knife': 8,
            'Hex': 9,
            'Polyhedron': 10
            })
        )

    # number of nodes to h5m name, element type
    h5m_type = {
github HDFGroup / h5pyd / h5pyd / _apps / utillib.py View on Github external
if len(srcdt) > 0:
        fields = []
        for name in srcdt.fields:
            item = srcdt.fields[name]
            # item is a tuple of dtype and integer offset
            field_dt = convert_dtype(item[0], ctx)
            fields.append((name, field_dt))
        tgt_dt = np.dtype(fields)
    else:
        # check if this a "special dtype"
        if srcdt.metadata and 'ref' in srcdt.metadata:
            ref = srcdt.metadata['ref']
            if is_reference(ref):
                if is_h5py(ctx['fout']):
                    tgt_dt = h5py.special_dtype(ref=h5py.Reference)
                else:
                    tgt_dt = h5pyd.special_dtype(ref=h5pyd.Reference)
            elif is_regionreference(ref):
                if is_h5py(ctx['fout']):
                    tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
                else:
                    tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
            else:
                msg = "Unexpected ref type: {}".format(srcdt)
                logging.error(msg)
                raise TypeError(msg)
        elif srcdt.metadata and 'vlen' in srcdt.metadata:
            src_vlen = srcdt.metadata['vlen']
            if isinstance(src_vlen, np.dtype):
                tgt_base = convert_dtype(src_vlen, ctx)
            else:
github darknight1900 / MobileDet / datasets / voc_to_hdf5.py View on Github external
voc_path = os.path.expanduser(args.path_to_voc)
    train_ids = get_ids(voc_path, train_set)
    val_ids = get_ids(voc_path, val_set)
    test_ids = get_ids(voc_path, test_set)
    train_ids_2007 = get_ids(voc_path, sets_from_2007)
    total_train_ids = len(train_ids) + len(train_ids_2007)

    # Create HDF5 dataset structure
    print('Creating HDF5 dataset structure.')
    fname = os.path.join(voc_path, 'pascal_voc_07_12_person_vehicle.hdf5')
    if os.path.exists(fname):
        print('Removing old ' + fname)
        os.remove(fname)

    voc_h5file = h5py.File(fname, 'w')
    uint8_dt = h5py.special_dtype(
        vlen=np.dtype('uint8'))  # variable length uint8
    vlen_int_dt = h5py.special_dtype(
        vlen=np.dtype(int))  # variable length default int
    train_group = voc_h5file.create_group('train')
    val_group = voc_h5file.create_group('valid')

    
    test_group = voc_h5file.create_group('test')

    # store class list for reference class ids as csv fixed-length numpy string
    voc_h5file.attrs['classes'] = np.string_(str.join(',', aerial_classes))

    # store images as variable length uint8 arrays
    train_images = train_group.create_dataset(
        'images', shape=(total_train_ids, ), dtype=uint8_dt, chunks=True)
    val_images = val_group.create_dataset(