Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
chunk_shape = (chunk_length, min(chunk_width, data.shape[1])) + data.shape[2:]
# create dataset
group, name = k.split('/')
if name in root[group]:
if overwrite:
del root[group][name]
else:
raise ValueError('dataset exists at path %r; use overwrite=True to replace' % k)
shape = (0,) + data.shape[1:]
maxshape = (None,) + data.shape[1:]
if data.dtype.kind == 'O':
if vlen:
dt = h5py.special_dtype(vlen=str)
else:
data = data.astype('S')
dt = data.dtype
else:
dt = data.dtype
ds = root[group].create_dataset(
name, shape=shape, maxshape=maxshape, chunks=chunk_shape, dtype=dt,
compression=compression, compression_opts=compression_opts, shuffle=shuffle
)
# copy metadata from VCF headers
# meta = None
# if group == 'variants' and name in headers.infos:
# meta = headers.infos[name]
# elif group == 'calldata' and name in headers.formats:
# meta = headers.formats[name]
def store(self, extension):
for filename, data in self.prediction.items():
texts = data['transcripts']
codec = data['codec']
basename, ext = split_all_ext(filename)
with h5py.File(basename + extension, 'w') as file:
dt = h5py.special_dtype(vlen=np.dtype('int32'))
file.create_dataset('transcripts', (len(texts),), dtype=dt)
file['transcripts'][...] = texts
file.create_dataset('codec', data=list(map(ord, codec)))
num_workers=workers,
pin_memory=True)
features = {}
for i, (input_data, paths) in enumerate(tqdm(loader)):
input_var = torch.autograd.Variable(input_data, volatile=True).cuda()
current_features = model(input_var).data.cpu().numpy()
for j, image_path in enumerate(paths):
features[image_path] = current_features[j]
feature_shape = features[list(features.keys())[0]].shape
logging.info('Feature shape: %s' % (feature_shape, ))
logging.info('Outputting features')
if sys.version_info >= (3, 0):
string_type = h5py.special_dtype(vlen=str)
else:
string_type = h5py.special_dtype(vlen=unicode) # noqa
paths = features.keys()
logging.info('Stacking features')
features_stacked = np.vstack([features[path] for path in paths])
logging.info('Output feature size: %s' % (features_stacked.shape, ))
with h5py.File(output_hdf5, 'a') as f:
f.create_dataset('features', data=features_stacked)
f.create_dataset(
'image_names',
(len(paths), ),
dtype=string_type)
# For some reason, assigning the list directly causes an error, so we
# assign it in a loop.
for i, image_path in enumerate(paths):
f['image_names'][i] = image_path_to_name(image_path)
def _pack_subjs(h5, subjects):
for subject in subjects:
rois = db.get_overlay(subject)
rnode = h5.require_dataset("/subjects/%s/rois"%subject, (1,),
dtype=h5py.special_dtype(vlen=str))
rnode[0] = rois.toxml(pretty=False)
surfaces = db.get_paths(subject)['surfs']
for surf in surfaces.keys():
for hemi in ("lh", "rh"):
pts, polys = db.get_surf(subject, surf, hemi)
group = "/subjects/%s/surfaces/%s/%s"%(subject, surf, hemi)
_hdf_write(h5, pts, "pts", group)
_hdf_write(h5, polys, "polys", group)
name for the new dataset
**kwargs:
all **kwargs are passed to create_dataset, useful for e.g. compression
'''
dtype = array.dtype
maxshape = [None] + list(array.shape)[1:]
shape = [0] + list(array.shape)[1:]
attrs = {}
if dtype.base == object:
if isinstance(array[0], list):
dt = np.array(array[0]).dtype
shape = [0, len(array[0])]
maxshape = [None, len(array[0])]
else:
dt = h5py.special_dtype(vlen=str)
elif dtype.type == np.datetime64:
# save dates as ISO string, create dummy date to get correct length
dt = np.array(0, dtype=dtype).astype('S').dtype
attrs['timeformat'] = 'iso'
else:
dt = dtype.base
# add default compression options if no options are given
if 'compression' not in kwargs:
kwargs.update(DEFAULT_COMPRESSION)
dataset = group.create_dataset(
name,
shape=tuple(shape),
def _pack_subjs(h5, subjects):
for subject in subjects:
rois = db.get_overlay(subject, type='rois')
rnode = h5.require_dataset("/subjects/%s/rois"%subject, (1,),
dtype=h5py.special_dtype(vlen=str))
rnode[0] = rois.toxml(pretty=False)
surfaces = db.get_paths(subject)['surfs']
for surf in surfaces.keys():
for hemi in ("lh", "rh"):
pts, polys = db.get_surf(subject, surf, hemi)
group = "/subjects/%s/surfaces/%s/%s"%(subject, surf, hemi)
_hdf_write(h5, pts, "pts", group)
_hdf_write(h5, polys, "polys", group)
# add nodes
nodes = tstt.create_group('nodes')
coords = nodes.create_dataset('coordinates', data=points)
coords.attrs.create('start_id', global_id)
global_id += len(points)
# add point data
if point_data:
tags = nodes.create_group('tags')
for key, value in point_data.items():
tags.create_dataset(key, data=value)
# add elements
elements = tstt.create_group('elements')
elem_dt = h5py.special_dtype(
enum=('i', {
'Edge': 1,
'Tri': 2,
'Quad': 3,
'Polygon': 4,
'Tet': 5,
'Pyramid': 6,
'Prism': 7,
'Knife': 8,
'Hex': 9,
'Polyhedron': 10
})
)
# number of nodes to h5m name, element type
h5m_type = {
if len(srcdt) > 0:
fields = []
for name in srcdt.fields:
item = srcdt.fields[name]
# item is a tuple of dtype and integer offset
field_dt = convert_dtype(item[0], ctx)
fields.append((name, field_dt))
tgt_dt = np.dtype(fields)
else:
# check if this a "special dtype"
if srcdt.metadata and 'ref' in srcdt.metadata:
ref = srcdt.metadata['ref']
if is_reference(ref):
if is_h5py(ctx['fout']):
tgt_dt = h5py.special_dtype(ref=h5py.Reference)
else:
tgt_dt = h5pyd.special_dtype(ref=h5pyd.Reference)
elif is_regionreference(ref):
if is_h5py(ctx['fout']):
tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
else:
tgt_dt = h5py.special_dtype(ref=h5py.RegionReference)
else:
msg = "Unexpected ref type: {}".format(srcdt)
logging.error(msg)
raise TypeError(msg)
elif srcdt.metadata and 'vlen' in srcdt.metadata:
src_vlen = srcdt.metadata['vlen']
if isinstance(src_vlen, np.dtype):
tgt_base = convert_dtype(src_vlen, ctx)
else:
voc_path = os.path.expanduser(args.path_to_voc)
train_ids = get_ids(voc_path, train_set)
val_ids = get_ids(voc_path, val_set)
test_ids = get_ids(voc_path, test_set)
train_ids_2007 = get_ids(voc_path, sets_from_2007)
total_train_ids = len(train_ids) + len(train_ids_2007)
# Create HDF5 dataset structure
print('Creating HDF5 dataset structure.')
fname = os.path.join(voc_path, 'pascal_voc_07_12_person_vehicle.hdf5')
if os.path.exists(fname):
print('Removing old ' + fname)
os.remove(fname)
voc_h5file = h5py.File(fname, 'w')
uint8_dt = h5py.special_dtype(
vlen=np.dtype('uint8')) # variable length uint8
vlen_int_dt = h5py.special_dtype(
vlen=np.dtype(int)) # variable length default int
train_group = voc_h5file.create_group('train')
val_group = voc_h5file.create_group('valid')
test_group = voc_h5file.create_group('test')
# store class list for reference class ids as csv fixed-length numpy string
voc_h5file.attrs['classes'] = np.string_(str.join(',', aerial_classes))
# store images as variable length uint8 arrays
train_images = train_group.create_dataset(
'images', shape=(total_train_ids, ), dtype=uint8_dt, chunks=True)
val_images = val_group.create_dataset(