How to use the tables.open_file function in tables

To help you get started, we’ve selected a few tables examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github HealthRex / CDSS / scripts / Archive / ClinicNet / clinicnet_model / utils / datagenerator.py View on Github external
def setup_batches(self, reset_cache=True):
        '''
        Setup batches to be used for the data generation pipeline
        Prepares the idx2batch and idx2file dicts that map batch number to the relevant data
        '''
        self.seed += 1
        if self.shuffle == True:
            np.random.RandomState(seed=self.seed).shuffle(self.files)
        if reset_cache:
            del self.cache
            self.cache = {} # Reset the cache
        idx = 0 
        self.idx2file = {} #reset the dictionaries
        self.idx2batch = {}
        for f in self.files:
            h5file = tables.open_file(self.path + "/" + f, mode="r")
            nrows = len(h5file.root.data_x.axis1)
            num_batches = int(np.ceil((nrows) / self.batch_size))
            indices = np.arange(nrows)
            if self.shuffle:
                np.random.RandomState(seed=(self.seed+idx)).shuffle(indices)
            num_rows_remaining = int(nrows % self.batch_size)
            padding = int(self.batch_size - num_rows_remaining) # The "padding" to add to make divisible by batch_size
            if num_rows_remaining > 0:
                batches = np.array_split(np.concatenate((indices, np.repeat(-1,padding))), num_batches) # Do batch assignments
                batches = [i[i != -1] for i in batches]
            else:
                batches = np.array_split(indices, num_batches)
            self.idx2batch.update(dict(list(zip(list(range(idx, idx+num_batches)), batches))))
            self.idx2file.update(dict.fromkeys(list(range(idx, idx+num_batches)), f))
            h5file.close()
            idx += num_batches
github liam2 / liam2 / liam2 / diff_h5.py View on Github external
def diff_h5(input1_path, input2_path, numdiff=10):
    input1_file = tables.open_file(input1_path, mode="r")
    input2_file = tables.open_file(input2_path, mode="r")

    input1_entities = input1_file.root.entities
    input2_entities = input2_file.root.entities

    # noinspection PyProtectedMember
    ent_names1 = set(table._v_name for table in input1_entities)
    # noinspection PyProtectedMember
    ent_names2 = set(table._v_name for table in input2_entities)
    for ent_name in sorted(ent_names1 | ent_names2):
        print()
        print(ent_name)
        if ent_name not in ent_names1:
            print("missing in file 1")
            continue
        elif ent_name not in ent_names2:
            print("missing in file 2")
github PyTables / PyTables / bench / LRUcache-node-bench.py View on Github external
import tables
from time import time
import psyco

filename = "/tmp/LRU-bench.h5"
nodespergroup = 250
niter = 100

f = tables.open_file(filename, "w")
g = f.create_group("/", "NodeContainer")
print "Creating nodes"
for i in range(nodespergroup):
    f.create_array(g, "arr%d"%i, [i])
f.close()

f = tables.open_file(filename)

def iternodes():
#     for a in f.root.NodeContainer:
#         pass
    indices = numpy.random.randn(nodespergroup*niter)*30+nodespergroup/2.
    indices = indices.astype('i4').clip(0, nodespergroup-1)
    g = f.get_node("/", "NodeContainer")
    for i in indices:
        a = f.get_node(g, "arr%d"%i)
        #print "a-->", a

print "reading nodes..."
# First iteration (put in LRU cache)
t1 = time()
for a in f.root.NodeContainer:
    pass
github ambrosejcarr / seqc / src / seqc / core.py View on Github external
def save(self, archive_name: str) -> None:
        """save a ReadArray in .h5 format

        :param archive_name: filename of a new .h5 archive in which to save the ReadArray
        :return: None
        """

        # create table
        blosc5 = tb.Filters(complevel=5, complib='blosc')
        f = tb.open_file(archive_name, mode='w', title='Data for seqc.ReadArray',
                         filters=blosc5)

        # store data
        f.create_table(f.root, 'data', self._data)
        f.close()
github PyTables / PyTables / bench / optimal-chunksize.py View on Github external
def bench(chunkshape, filters):
    numpy.random.seed(1)   # to have reproductible results
    filename = tempfile.mktemp(suffix='.h5')
    print "Doing test on the file system represented by:", filename

    f = tables.open_file(filename, 'w')
    e = f.create_earray(f.root, 'earray', datom, shape=(0, M),
                       filters = filters,
                       chunkshape = chunkshape)
    # Fill the array
    t1 = time()
    for i in xrange(N):
        #e.append([numpy.random.rand(M)])  # use this for less compressibility
        e.append([quantize(numpy.random.rand(M), 6)])
    #os.system("sync")
    print "Creation time:", round(time()-t1, 3),
    filesize = get_db_size(filename)
    filesize_bytes = os.stat(filename)[6]
    print "\t\tFile size: %d -- (%s)" % (filesize_bytes, filesize)

    # Read in sequential mode:
    e = f.root.earray
github blaze / blaze / blaze / datadescriptor / hdf5_data_descriptor.py View on Github external
def __setitem__(self, key, value):
        # HDF5 arrays can be updated
        with tb.open_file(self.path, mode=self.mode) as f:
            dset = f.get_node(self.datapath)
            dset[key] = value
github 10XGenomics / cellranger / mro / stages / analyzer / run_tsne / __init__.py View on Github external
else:
        # Use feature space for other feature types
        # Assumes other feature types are much lower dimension than gene expression
        matrix = matrix.select_features_by_type(args.feature_type)
        matrix.m.data = np.log2(1 + matrix.m.data)
        tsne_input = matrix.m.transpose().todense()

    name = get_tsne_name(args.feature_type, args.tsne_dims)
    key = get_tsne_key(args.feature_type, args.tsne_dims)

    tsne = cr_tsne.run_tsne(tsne_input, name=name, key=key, input_pcs=args.input_pcs, perplexity=args.perplexity,
                     theta=args.theta, tsne_dims=tsne_dims, max_iter=args.max_iter, stop_lying_iter=args.stop_lying_iter,
                     mom_switch_iter=args.mom_switch_iter, random_state=args.random_seed)

    filters = tables.Filters(complevel = h5_constants.H5_COMPRESSION_LEVEL)
    with tables.open_file(outs.tsne_h5, 'w', filters = filters) as f:
        cr_tsne.save_tsne_h5(tsne, f)

    cr_tsne.save_tsne_csv(tsne, matrix, outs.tsne_csv)
github HiSPARC / sapphire / scripts / simulations / cluster_sim.py View on Github external
import textwrap

import tables

import clusters

from simulations import GroundParticlesSimulation, QSubSimulation

DATAFILE = 'data.h5'


if __name__ == '__main__':
    try:
        data
    except NameError:
        data = tables.open_file(DATAFILE, 'a')

    if '/simulations' in data:
        print
        print textwrap.dedent("""\
            WARNING: previous simulations exist and will be overwritten
            Continue? (answer 'yes'; anything else will exit)""")
        try:
            inp = raw_input()
        except KeyboardInterrupt:
            inp = 'Ctrl-C'

        if inp.lower() == 'yes':
            data.remove_node('/simulations', recursive=True)
        else:
            print
            print "Aborting!"
github astorfi / 3D-convolutional-speaker-recognition / code / 0-input / create_hdf5 / create_development.py View on Github external
#########################################
########### TRAIN/TEST HDF5 #############
#########################################
phase = 'development'

output_filename = phase + '.hdf5'
#################################################
################# HDF5 elements #################
#################################################

# DEFAULTS:
num_frames = 80
num_coefficient = 40
num_utterances = 20

hdf5_file = tables.open_file(output_filename, mode='w')
filters = tables.Filters(complevel=5, complib='blosc')
utterance_train_storage = hdf5_file.create_earray(hdf5_file.root, 'utterance_train',
                                         tables.Float32Atom(shape=(), dflt=0.0),
                                         shape=(0, num_frames, num_coefficient, num_utterances),
                                         filters=filters)
utterance_test_storage = hdf5_file.create_earray(hdf5_file.root, 'utterance_test',
                                         tables.Float32Atom(shape=(), dflt=0.0),
                                         shape=(0, num_frames, num_coefficient, num_utterances),
                                         filters=filters)
label_train_storage = hdf5_file.create_earray(hdf5_file.root, 'label_train',
                                        tables.IntAtom(shape=(), dflt=0.0),
                                        shape=(0,),
                                        filters=filters)
label_test_storage = hdf5_file.create_earray(hdf5_file.root, 'label_test',
                                        tables.IntAtom(shape=(), dflt=0.0),
                                        shape=(0,),
github ver228 / tierpsy-tracker / work_in_progress / old / track_segWorm.py View on Github external
good_index = tracks_data[(delX>MIN_DISPLACEMENT) & (delY>MIN_DISPLACEMENT)].index
df = df[df.worm_index_joined.isin(good_index)]
table_fid.close()

#df['segworm_results_id'] = pd.Series(-1, index = df.index)


#calculate track length, it is important to do this instead of counting because some joined tracks are discontinous
#for the moment usesless
track_size = (tracks_data.loc[good_index]['frame_number']['max']- \
    tracks_data.loc[good_index]['frame_number']['min']+1)


#open the file again, this time using pytables in append mode to add segworm data
results_fid = tables.open_file(trajectories_file, 'r+')
if 'segworm_results' in results_fid.root._v_children.keys():
    results_fid.remove_node('/segworm_results')
segworm_table = results_fid.create_table('/', "segworm_results", segworm_results,"Results from the skeletonization using segWorm.")
#segworm_results = results_fid.create_vlarray(results_fid.root, 'segworm_results',
#tables.ObjectAtom(), "", filters=tables.Filters(complevel = 1, complib = 'blosc', shuffle = True))

tracking_table = results_fid.get_node('/plate_worms')


prev_worms = {}

tic = time.time()
tic_first = tic
for frame in range(0, 100):#df['frame_number'].max()):
    
    img = mask_dataset[frame,:,:]