Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setup_batches(self, reset_cache=True):
'''
Setup batches to be used for the data generation pipeline
Prepares the idx2batch and idx2file dicts that map batch number to the relevant data
'''
self.seed += 1
if self.shuffle == True:
np.random.RandomState(seed=self.seed).shuffle(self.files)
if reset_cache:
del self.cache
self.cache = {} # Reset the cache
idx = 0
self.idx2file = {} #reset the dictionaries
self.idx2batch = {}
for f in self.files:
h5file = tables.open_file(self.path + "/" + f, mode="r")
nrows = len(h5file.root.data_x.axis1)
num_batches = int(np.ceil((nrows) / self.batch_size))
indices = np.arange(nrows)
if self.shuffle:
np.random.RandomState(seed=(self.seed+idx)).shuffle(indices)
num_rows_remaining = int(nrows % self.batch_size)
padding = int(self.batch_size - num_rows_remaining) # The "padding" to add to make divisible by batch_size
if num_rows_remaining > 0:
batches = np.array_split(np.concatenate((indices, np.repeat(-1,padding))), num_batches) # Do batch assignments
batches = [i[i != -1] for i in batches]
else:
batches = np.array_split(indices, num_batches)
self.idx2batch.update(dict(list(zip(list(range(idx, idx+num_batches)), batches))))
self.idx2file.update(dict.fromkeys(list(range(idx, idx+num_batches)), f))
h5file.close()
idx += num_batches
def diff_h5(input1_path, input2_path, numdiff=10):
input1_file = tables.open_file(input1_path, mode="r")
input2_file = tables.open_file(input2_path, mode="r")
input1_entities = input1_file.root.entities
input2_entities = input2_file.root.entities
# noinspection PyProtectedMember
ent_names1 = set(table._v_name for table in input1_entities)
# noinspection PyProtectedMember
ent_names2 = set(table._v_name for table in input2_entities)
for ent_name in sorted(ent_names1 | ent_names2):
print()
print(ent_name)
if ent_name not in ent_names1:
print("missing in file 1")
continue
elif ent_name not in ent_names2:
print("missing in file 2")
import tables
from time import time
import psyco
filename = "/tmp/LRU-bench.h5"
nodespergroup = 250
niter = 100
f = tables.open_file(filename, "w")
g = f.create_group("/", "NodeContainer")
print "Creating nodes"
for i in range(nodespergroup):
f.create_array(g, "arr%d"%i, [i])
f.close()
f = tables.open_file(filename)
def iternodes():
# for a in f.root.NodeContainer:
# pass
indices = numpy.random.randn(nodespergroup*niter)*30+nodespergroup/2.
indices = indices.astype('i4').clip(0, nodespergroup-1)
g = f.get_node("/", "NodeContainer")
for i in indices:
a = f.get_node(g, "arr%d"%i)
#print "a-->", a
print "reading nodes..."
# First iteration (put in LRU cache)
t1 = time()
for a in f.root.NodeContainer:
pass
def save(self, archive_name: str) -> None:
"""save a ReadArray in .h5 format
:param archive_name: filename of a new .h5 archive in which to save the ReadArray
:return: None
"""
# create table
blosc5 = tb.Filters(complevel=5, complib='blosc')
f = tb.open_file(archive_name, mode='w', title='Data for seqc.ReadArray',
filters=blosc5)
# store data
f.create_table(f.root, 'data', self._data)
f.close()
def bench(chunkshape, filters):
numpy.random.seed(1) # to have reproductible results
filename = tempfile.mktemp(suffix='.h5')
print "Doing test on the file system represented by:", filename
f = tables.open_file(filename, 'w')
e = f.create_earray(f.root, 'earray', datom, shape=(0, M),
filters = filters,
chunkshape = chunkshape)
# Fill the array
t1 = time()
for i in xrange(N):
#e.append([numpy.random.rand(M)]) # use this for less compressibility
e.append([quantize(numpy.random.rand(M), 6)])
#os.system("sync")
print "Creation time:", round(time()-t1, 3),
filesize = get_db_size(filename)
filesize_bytes = os.stat(filename)[6]
print "\t\tFile size: %d -- (%s)" % (filesize_bytes, filesize)
# Read in sequential mode:
e = f.root.earray
def __setitem__(self, key, value):
# HDF5 arrays can be updated
with tb.open_file(self.path, mode=self.mode) as f:
dset = f.get_node(self.datapath)
dset[key] = value
else:
# Use feature space for other feature types
# Assumes other feature types are much lower dimension than gene expression
matrix = matrix.select_features_by_type(args.feature_type)
matrix.m.data = np.log2(1 + matrix.m.data)
tsne_input = matrix.m.transpose().todense()
name = get_tsne_name(args.feature_type, args.tsne_dims)
key = get_tsne_key(args.feature_type, args.tsne_dims)
tsne = cr_tsne.run_tsne(tsne_input, name=name, key=key, input_pcs=args.input_pcs, perplexity=args.perplexity,
theta=args.theta, tsne_dims=tsne_dims, max_iter=args.max_iter, stop_lying_iter=args.stop_lying_iter,
mom_switch_iter=args.mom_switch_iter, random_state=args.random_seed)
filters = tables.Filters(complevel = h5_constants.H5_COMPRESSION_LEVEL)
with tables.open_file(outs.tsne_h5, 'w', filters = filters) as f:
cr_tsne.save_tsne_h5(tsne, f)
cr_tsne.save_tsne_csv(tsne, matrix, outs.tsne_csv)
import textwrap
import tables
import clusters
from simulations import GroundParticlesSimulation, QSubSimulation
DATAFILE = 'data.h5'
if __name__ == '__main__':
try:
data
except NameError:
data = tables.open_file(DATAFILE, 'a')
if '/simulations' in data:
print
print textwrap.dedent("""\
WARNING: previous simulations exist and will be overwritten
Continue? (answer 'yes'; anything else will exit)""")
try:
inp = raw_input()
except KeyboardInterrupt:
inp = 'Ctrl-C'
if inp.lower() == 'yes':
data.remove_node('/simulations', recursive=True)
else:
print
print "Aborting!"
#########################################
########### TRAIN/TEST HDF5 #############
#########################################
phase = 'development'
output_filename = phase + '.hdf5'
#################################################
################# HDF5 elements #################
#################################################
# DEFAULTS:
num_frames = 80
num_coefficient = 40
num_utterances = 20
hdf5_file = tables.open_file(output_filename, mode='w')
filters = tables.Filters(complevel=5, complib='blosc')
utterance_train_storage = hdf5_file.create_earray(hdf5_file.root, 'utterance_train',
tables.Float32Atom(shape=(), dflt=0.0),
shape=(0, num_frames, num_coefficient, num_utterances),
filters=filters)
utterance_test_storage = hdf5_file.create_earray(hdf5_file.root, 'utterance_test',
tables.Float32Atom(shape=(), dflt=0.0),
shape=(0, num_frames, num_coefficient, num_utterances),
filters=filters)
label_train_storage = hdf5_file.create_earray(hdf5_file.root, 'label_train',
tables.IntAtom(shape=(), dflt=0.0),
shape=(0,),
filters=filters)
label_test_storage = hdf5_file.create_earray(hdf5_file.root, 'label_test',
tables.IntAtom(shape=(), dflt=0.0),
shape=(0,),
good_index = tracks_data[(delX>MIN_DISPLACEMENT) & (delY>MIN_DISPLACEMENT)].index
df = df[df.worm_index_joined.isin(good_index)]
table_fid.close()
#df['segworm_results_id'] = pd.Series(-1, index = df.index)
#calculate track length, it is important to do this instead of counting because some joined tracks are discontinous
#for the moment usesless
track_size = (tracks_data.loc[good_index]['frame_number']['max']- \
tracks_data.loc[good_index]['frame_number']['min']+1)
#open the file again, this time using pytables in append mode to add segworm data
results_fid = tables.open_file(trajectories_file, 'r+')
if 'segworm_results' in results_fid.root._v_children.keys():
results_fid.remove_node('/segworm_results')
segworm_table = results_fid.create_table('/', "segworm_results", segworm_results,"Results from the skeletonization using segWorm.")
#segworm_results = results_fid.create_vlarray(results_fid.root, 'segworm_results',
#tables.ObjectAtom(), "", filters=tables.Filters(complevel = 1, complib = 'blosc', shuffle = True))
tracking_table = results_fid.get_node('/plate_worms')
prev_worms = {}
tic = time.time()
tic_first = tic
for frame in range(0, 100):#df['frame_number'].max()):
img = mask_dataset[frame,:,:]