Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
class H5Converter(object):
"""Base converter class.
@cvar str_type: string type to be used for variable length strings in h5py
@type str_type: numpy.dtype
@ivar fname_in: filename to read data from
@type fname_in: string
@ivar fname_out: filename to write converted data to
@type fname_out: string
@ivar labels_idx: indices for labels for each row
@type labels_idx: list of integers
@ivar seperator: seperator to seperate variables in examples
@type seperator: string
"""
str_type = h5py.new_vlen(numpy.str)
def __init__(self, fname_in, fname_out, seperator=None, remove_out=True):
"""
@param seperator: seperator used to seperate examples
@type seperator: string
@param remove_out: if output file shall be removed before running.
@type remove_out: boolean
"""
self.fname_in = fname_in
self.fname_out = fname_out
self.labels_idx = None
self.set_seperator(seperator)
# sometimes it seems files are not properly overwritten when opened by
# 'w' during run().
def add_mesh_from_string(self, name, shape_data, scale=None,
insideMargin=None, outsideMargin=None):
"""
Add a mesh shape from a string.
Accepted format : mesh encoded in VTK .vtp format
"""
if name not in self._ref:
shape = self._ref.create_dataset(name, (1,),
dtype=h5py.new_vlen(str))
shape[:] = shape_data
shape.attrs['id'] = self._number_of_shapes
shape.attrs['type'] = 'vtp'
if scale is not None:
shape.attrs['scale'] = scale
if insideMargin is not None:
shape.attrs['insideMargin'] = insideMargin
if outsideMargin is not None:
shape.attrs['outsideMargin'] = outsideMargin
self._number_of_shapes += 1
group_prefix_new = group_prefix + k + "/"
log_debug(logger, "Writing group %s" % group_prefix_new)
if k not in self._f[group_prefix]:
self._f.create_group(group_prefix_new)
self._write_without_iterate(D[k], group_prefix_new)
else:
name = group_prefix + k
logger.debug( "Writing dataset %s" % name)
data = D[k]
if k not in self._f[group_prefix]:
if numpy.isscalar(data):
maxshape = (None,)
shape = (self._chunksize,)
dtype = numpy.dtype(type(data))
if dtype == "S":
dtype = h5py.new_vlen(str)
axes = "experiment_identifier:value"
else:
data = numpy.asarray(data)
try:
h5py.h5t.py_create(data.dtype, logical=1)
except TypeError:
logger.warning("Could not save dataset %s. Conversion to numpy array failed" % name)
continue
maxshape = tuple([None]+list(data.shape))
shape = tuple([self._chunksize]+list(data.shape))
dtype = data.dtype
ndim = data.ndim
axes = "experiment_identifier"
if ndim == 1: axes = axes + ":x"
elif ndim == 2: axes = axes + ":y:x"
elif ndim == 3: axes = axes + ":z:y:x"
class BaseHandler(object):
"""Base handler class.
It is the base for classes to handle different data formats.
It implicitely handles HDF5.
@cvar str_type: string type to be used for variable length strings in h5py
@type str_type: numpy.dtype
@ivar fname: name of file to handle
@type fname: string
@ivar seperator: seperator to seperate variables in examples
@type seperator: string
"""
str_type = h5py.new_vlen(numpy.str)
def __init__(self, fname, seperator=None, compression=None, merge=False):
"""
@param fname: name of in-file
@type fname: string
@param seperator: seperator used to seperate examples
@type seperator: string
"""
self.fname = fname
self.compression = compression
self.set_seperator(seperator)
self.merge = merge
def set_seperator(self, seperator):
def addInteraction(self, name, body1_name, contactor1_name,
body2_name, contactor2_name,
distance_calculator='cadmbtb',
offset=0.0001):
"""
Add permanent interactions between two objects contactors.
"""
if name not in self.permanent_interactions():
pinter = self.permanent_interactions().\
create_dataset(name, (1,),
dtype=h5py.new_vlen(str))
pinter.attrs['id'] = self._number_of_permanent_interactions
pinter.attrs['type'] = 'permanent_interaction'
pinter.attrs['body1_name'] = body1_name
pinter.attrs['body2_name'] = body2_name
pinter.attrs['contactor1_name'] = contactor1_name
pinter.attrs['contactor2_name'] = contactor2_name
pinter.attrs['distance_calculator'] = distance_calculator
pinter.attrs['offset'] = offset
self._pinterid[name] = pinter.attrs['id']
self._number_of_permanent_interactions += 1
md5hex = hashlib.md5(' '.join(sys.argv)).hexdigest()
file_base = os.path.abspath(session_location)+"/seg_data/seg-"+str(threshold) + "-" + md5hex + "-"
transforms = imio.compute_sp_to_body_map(supervoxels, segmentation)
seg_loc = file_base +"v1.h5"
if not os.path.exists(session_location+"/seg_data"):
os.makedirs(session_location+"/seg_data")
imio.write_mapped_segmentation(supervoxels, transforms, seg_loc)
if options.synapse_file is not None:
h5temp = h5py.File(seg_loc, 'a')
syn_data = json.load(open((options.synapse_file)))
meta = syn_data['metadata']
meta['username'] = "auto"
syn_data_str = json.dumps(syn_data, indent=4)
str_type = h5py.new_vlen(str)
ds = h5temp.create_dataset("synapse-annotations", data=syn_data_str, shape=(1,), dtype=str_type)
graph_loc = file_base+"graphv1.json"
json_data = {}
json_data['graph'] = graph_loc
json_data['border'] = options.border_size
subvolume = {}
subvolume['segmentation-file'] = seg_loc
subvolume['prediction-file'] = os.path.abspath(session_location) + "/STACKED_prediction.h5"
gray_file_whole = os.path.abspath(glob.glob(options.image_stack)[0])
gray_path = os.path.dirname(gray_file_whole)
gray_file = os.path.basename(gray_file_whole)
field_width = len(re.findall(r'\d',gray_file))
def _createDatasetInFile(self, hdf5File, datasetName, roi):
shape = tuple( roi[1] - roi[0] )
chunks = self._description.chunks
if chunks is not None:
# chunks must not be bigger than the data in any dim
chunks = numpy.minimum( chunks, shape )
chunks = tuple(chunks)
compression = self._description.compression
compression_opts = self._description.compression_opts
dtype=self._description.dtype
if dtype == object:
dtype = h5py.new_vlen(str)
dataset = hdf5File.create_dataset( datasetName,
shape=shape,
dtype=dtype,
chunks=chunks,
compression=compression,
compression_opts=compression_opts )
# Set data attributes
if self._description.drange is not None:
dataset.attrs['drange'] = self._description.drange
if _use_vigra:
dataset.attrs['axistags'] = vigra.defaultAxistags( str(self._description.axes) ).toJSON()
#print file_in['data'].keys()
#print file_in['data'].items()
# Create external link to parent's data
#file_out['history/parent/detail/data'] = h5py.ExternalLink(src,'/data')
parent_module = os.path.basename( src ) [ : os.path.basename( src ) .find( '_out' ) ]
file_out['history/parent/detail/data'] = h5py.ExternalLink( '../' + parent_module + '/' + os.path.basename( src ) , '/data' )
# Create your own groups
grp_data = file_out.create_group( "data" )
grp_param = file_out.create_group( "params" )
grp_param = file_out.create_group( "misc" )
grp_param = file_out.create_group( "info" )
str_type = h5py.new_vlen(str)
# Interface version
dataset = file_out.create_dataset("version", (1,), dtype='f')
dataset[...] = 0.1
# Populate /info
dataset = file_out.create_dataset("info/package_version",(1,), dtype=str_type)
data = ("SingFEL v0.1.0")
dataset[...] = data
dataset = file_out.create_dataset("info/contact",(2,), dtype=str_type)
data = ("Name: Chunhong Yoon", "Email: chun.hong.yoon@desy.de")
dataset[...] = data
dataset = file_out.create_dataset("info/data_description",(1,), dtype=str_type)
data = ("This dataset contains a diffraction pattern generated using SingFEL.")
dataset[...] = data
dataset = file_out.create_dataset("info/method_description",(1,), dtype=str_type)
data = ("Form factors of the radiation damaged molecules are calculated in time slices. At each time slice, the coherent scattering is calculated and incoherently added to the final diffraction pattern. Finally, Poissonian noise is added to the diffraction pattern.")
dataset[...] = data
def _create_dataset(self, data, name):
if numpy.isscalar(data):
maxshape = (None,)
shape = (self._chunksize,)
dtype = numpy.dtype(type(data))
if dtype == "S":
dtype = h5py.new_vlen(str)
axes = "experiment_identifier:value"
else:
data = numpy.asarray(data)
try:
h5py.h5t.py_create(data.dtype, logical=1)
except TypeError:
log_warning(logger, "(%i) Could not save dataset %s. Conversion to numpy array failed" % (self._rank, name))
return 1
maxshape = tuple([None]+list(data.shape))
shape = tuple([self._chunksize]+list(data.shape))
dtype = data.dtype
ndim = data.ndim
axes = "experiment_identifier"
if ndim == 1: axes = axes + ":x"
elif ndim == 2: axes = axes + ":y:x"
elif ndim == 3: axes = axes + ":z:y:x"
maxshape=max_shape(raw_image_cpy.shape),
compression=self._hdf5_compression)
self._hdf5_file[raw_image_str].attrs['valid'] = raw_image_valid
if self._hdf5_file[raw_image_str].shape[0] != len(self._regions_to_idx):
self._hdf5_file[raw_image_str].resize(len(self._regions_to_idx), axis=0)
if feature_dict is not None:
if object_dict is not None:
for (key_desc, key_data), (value_desc, value_data) in feature_dict.items():
self._hdf5_file.create_dataset(key_desc, data=value_desc, compression=self._hdf5_compression)
if not value_data.dtype == numpy.dtype('O'):
d = self._hdf5_file.create_dataset(key_data, data=value_data, compression=self._hdf5_compression)
d.attrs["reused"] = True
else:
d = self._hdf5_file.create_dataset(key_data, data=value_data, compression=self._hdf5_compression, dtype=h5py.new_vlen(str))
d.attrs["reused"] = True
for (key_desc, key_data), (value_desc, value_data) in object_dict.items():
d = self._hdf5_file.create_dataset(key_desc, data=value_desc, compression=self._hdf5_compression)
d.attrs["reused"] = True
d = self._hdf5_file.create_dataset(key_data, data=value_data, compression=self._hdf5_compression)
d.attrs["reused"] = True
self.cellh5_file = CH5File(self._hdf5_file)