Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
output_file = h5io.WESTPAH5File(self.output_filename, mode='w')
pi = self.progress.indicator
iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop
iter_count = iter_stop - iter_start
output_file.create_dataset('n_iter', dtype=n_iter_dtype, data=list(range(iter_start,iter_stop)))
current_seg_count = 0
seg_count_ds = output_file.create_dataset('n_segs', dtype=numpy.uint, shape=(iter_count,))
matching_segs_ds = output_file.create_dataset('seg_ids', shape=(iter_count,0), maxshape=(iter_count,None),
dtype=seg_id_dtype,
chunks=h5io.calc_chunksize((iter_count,1000000), seg_id_dtype),
shuffle=True, compression=9)
weights_ds = output_file.create_dataset('weights', shape=(iter_count,0), maxshape=(iter_count,None),
dtype=weight_dtype,
chunks=h5io.calc_chunksize((iter_count,1000000), weight_dtype),
shuffle=True,compression=9)
with pi:
pi.new_operation('Finding matching segments', extent=iter_count)
# futures = set()
# for n_iter in xrange(iter_start,iter_stop):
# futures.add(self.work_manager.submit(_find_matching_segments,
# args=(self.data_reader.we_h5filename,n_iter,self.predicate,self.invert)))
# for future in self.work_manager.as_completed(futures):
for future in self.work_manager.submit_as_completed(((_find_matching_segments,
(self.data_reader.we_h5filename,n_iter,self.predicate,self.invert),
{}) for n_iter in range(iter_start,iter_stop)),
self.max_queue_len):
n_iter, matching_ids = future.get_result()
n_matches = len(matching_ids)
from .core import WESTToolComponent
import sys,logging, pickle, math
from itertools import count
import numpy
import westpa
import westpa.binning
from westpa.binning import RectilinearBinMapper
from westpa.extloader import get_object
from pickle import PickleError
log = logging.getLogger(__name__)
from west.data_manager import weight_dtype
EPS = numpy.finfo(weight_dtype).eps
def mapper_from_expr(expr):
namespace = {'numpy': numpy,
'inf': float('inf')}
try:
mapper = RectilinearBinMapper(eval(expr,namespace))
except TypeError as e:
if 'has no len' in str(e):
raise ValueError('invalid bin boundary specification (a list of lists is required)')
else:
raise
else:
log.debug('loaded {!r} from expression {!r}'.format(mapper,expr))
return mapper
def report_bin_statistics(self, bins, save_summary=False):
segments = list(self.segments.values())
bin_counts = numpy.fromiter(map(len,bins), dtype=numpy.int_, count=len(bins))
target_counts = self.we_driver.bin_target_counts
# Do not include bins with target count zero (e.g. sinks, never-filled bins) in the (non)empty bins statistics
n_active_bins = len(target_counts[target_counts!=0])
seg_probs = numpy.fromiter(map(operator.attrgetter('weight'), segments), dtype=weight_dtype, count=len(segments))
bin_probs = numpy.fromiter(map(operator.attrgetter('weight'), bins), dtype=weight_dtype, count=len(bins))
norm = seg_probs.sum()
assert abs(1 - norm) < EPS*(len(segments)+n_active_bins)
min_seg_prob = seg_probs[seg_probs!=0].min()
max_seg_prob = seg_probs.max()
seg_drange = math.log(max_seg_prob/min_seg_prob)
min_bin_prob = bin_probs[bin_probs!=0].min()
max_bin_prob = bin_probs.max()
bin_drange = math.log(max_bin_prob/min_bin_prob)
n_pop = len(bin_counts[bin_counts!=0])
self.rc.pstatus('{:d} of {:d} ({:%}) active bins are populated'.format(n_pop, n_active_bins,n_pop/n_active_bins))
self.rc.pstatus('per-bin minimum non-zero probability: {:g}'.format(min_bin_prob))
self.rc.pstatus('per-bin maximum probability: {:g}'.format(max_bin_prob))
self.rc.pstatus('per-bin probability dynamic range (kT): {:g}'.format(bin_drange))
def populate_initial(self, initial_states, weights, system=None):
'''Create walkers for a new weighted ensemble simulation.
One segment is created for each provided initial state, then binned and split/merged
as necessary. After this function is called, next_iter_segments will yield the new
segments to create, used_initial_states will contain data about which of the
provided initial states were used, and avail_initial_states will contain data about
which initial states were unused (because their corresponding walkers were merged
out of existence).
'''
# This has to be down here to avoid an import race
from west.data_manager import weight_dtype
EPS = numpy.finfo(weight_dtype).eps
system = system or westpa.rc.get_system_driver()
self.new_iteration(initial_states=[], target_states=[],
bin_mapper=system.bin_mapper, bin_target_counts=system.bin_target_counts)
# Create dummy segments
segments = []
for (seg_id, (initial_state,weight)) in enumerate(zip(initial_states,weights)):
dummy_segment = Segment(n_iter=0,
seg_id=seg_id,
parent_id=-(initial_state.state_id+1),
weight=weight,
wtg_parent_ids=set([-(initial_state.state_id+1)]),
pcoord=system.new_pcoord_array(),
status=Segment.SEG_STATUS_PREPARED)
dummy_segment.pcoord[[0,-1]] = initial_state.pcoord
nstates = len(state_labels)
nbins = labeled_pops.shape[1]-1
# Prepare output arrays
if labeled_fluxes is None:
labeled_fluxes = numpy.zeros((nstates, nstates, nbins, nbins), weight_dtype)
else:
labeled_fluxes.fill(0.0)
if labeled_rates is None:
labeled_rates = numpy.zeros_like(labeled_fluxes)
else:
labeled_rates.fill(0.0)
if unlabeled_rates is None:
unlabeled_rates = numpy.zeros((nbins,nbins), weight_dtype)
else:
unlabeled_rates.fill(0.0)
# Loop over all possible windows to accumulate flux matrix
# flux matrix is [initial_label][final_label][initial_bin][final_bin]
if all_lags:
twindow = calculate_labeled_fluxes_alllags(nstates, weights, parent_ids, bin_assignments, label_assignments, labeled_fluxes)
else:
twindow = calculate_labeled_fluxes(nstates, weights, parent_ids, bin_assignments, label_assignments, labeled_fluxes)
labeled_fluxes /= twindow
# Calculate rate matrix for this window, using populations from the last iteration (which correspond
# to the weights that contribute to the flux matrix)
labeled_flux_to_rate(labeled_fluxes, labeled_pops, labeled_rates)
assignments_ds = assignments_file['assignments']
iter_start, iter_stop = self.iter_range.iter_start, self.iter_range.iter_stop
iter_count = iter_stop - iter_start
h5io.check_iter_range_least(assignments_ds, iter_start, iter_stop)
nsegs = assignments_file['nsegs'][h5io.get_iteration_slice(assignments_file['nsegs'], iter_start,iter_stop)]
output_file.create_dataset('n_iter', dtype=n_iter_dtype, data=list(range(iter_start,iter_stop)))
seg_count_ds = output_file.create_dataset('nsegs', dtype=numpy.uint, shape=(iter_count,nbins))
matching_segs_ds = output_file.create_dataset('seg_ids', shape=(iter_count,nbins,count),
dtype=seg_id_dtype,
chunks=h5io.calc_chunksize((iter_count,nbins,count), seg_id_dtype),
shuffle=True, compression=9)
weights_ds = output_file.create_dataset('weights', shape=(iter_count,nbins,count),
dtype=weight_dtype,
chunks=h5io.calc_chunksize((iter_count,nbins,count), weight_dtype),
shuffle=True,compression=9)
what = self.what
with pi:
pi.new_operation('Finding matching segments', extent=iter_count)
for iiter, n_iter in enumerate(range(iter_start, iter_stop)):
assignments = numpy.require(assignments_ds[h5io.get_iteration_entry(assignments_ds, n_iter)
+ numpy.index_exp[:,timepoint]], dtype=westpa.binning.index_dtype)
all_weights = self.data_reader.get_iter_group(n_iter)['seg_index']['weight']
# the following Cython function just executes this loop:
#for iseg in xrange(nsegs[iiter]):
# segs_by_bin[iseg,assignments[iseg]] = True
segs_by_bin = assignments_list_to_table(nsegs[iiter],nbins,assignments)
for ibin in range(nbins):
def _assign_label_pop(n_iter, lb, ub, mapper, nstates, state_map, last_labels, parent_id_dsspec, weight_dsspec, pcoord_dsspec):
nbins = len(state_map)-1
parent_ids = parent_id_dsspec.get_iter_data(n_iter,index_exp[lb:ub])
weights = weight_dsspec.get_iter_data(n_iter,index_exp[lb:ub])
pcoords = pcoord_dsspec.get_iter_data(n_iter,index_exp[lb:ub])
assignments, trajlabels = assign_and_label(lb, ub, parent_ids,
mapper.assign, nstates, state_map, last_labels, pcoords)
pops = numpy.zeros((nstates+1,nbins+1), weight_dtype)
accumulate_labeled_populations(weights, assignments, trajlabels, pops)
return (assignments, trajlabels, pops, lb, ub)
# loop terminates with parent_id set to the identifier of the initial state,
# seg_id set to the identifier of the first segment in the trajectory, and
# n_iter set to one less than the iteration of the first segment
first_iter = n_iter + 1
first_seg_id = seg_id
first_parent_id = parent_id
# Initial segment (for fetching initial state)
first_segment = Segment(n_iter=first_iter, seg_id=first_seg_id, parent_id=first_parent_id)
seginfo.reverse()
summary_dtype = numpy.dtype([('n_iter', n_iter_dtype),
('seg_id', seg_id_dtype),
('weight', weight_dtype),
('walltime', utime_dtype),
('cputime', utime_dtype),
('final_pcoord', pcoord_dtype, pcoord_pt_shape),
])
summary = numpy.array(seginfo, dtype=summary_dtype)
try:
initial_state = data_manager.get_segment_initial_states([first_segment], first_iter)[0]
except KeyError:
# old HDF5 version
assert parent_id < 0
istate_pcoord = data_manager.get_iter_group(first_iter)['pcoord'][first_seg_id,0]
istate_id = -(first_parent_id+1)
basis_state = None
initial_state = InitialState(istate_id, None, iter_created=0, pcoord=istate_pcoord)
labeled_fluxes = None, labeled_rates = None, unlabeled_rates = None):
'''Estimate fluxes and rates over multiple iterations. The number of iterations is determined by how many
vectors of weights, parent IDs, bin assignments, and label assignments are passed.
If ``all_lags`` is true, then the average is over all possible lags within the length-N window given, otherwise
simply the length N lag.
Returns labeled flux matrix, labeled rate matrix, and unlabeled rate matrix.'''
assert len(weights) == len(parent_ids) == len(bin_assignments) == len(label_assignments)
nstates = len(state_labels)
nbins = labeled_pops.shape[1]-1
# Prepare output arrays
if labeled_fluxes is None:
labeled_fluxes = numpy.zeros((nstates, nstates, nbins, nbins), weight_dtype)
else:
labeled_fluxes.fill(0.0)
if labeled_rates is None:
labeled_rates = numpy.zeros_like(labeled_fluxes)
else:
labeled_rates.fill(0.0)
if unlabeled_rates is None:
unlabeled_rates = numpy.zeros((nbins,nbins), weight_dtype)
else:
unlabeled_rates.fill(0.0)
# Loop over all possible windows to accumulate flux matrix
# flux matrix is [initial_label][final_label][initial_bin][final_bin]
fillvalue=nbins)
if self.states:
trajlabel_dtype = numpy.min_scalar_type(nstates)
trajlabels_ds = self.output_file.create_dataset('trajlabels', dtype=trajlabel_dtype, shape=assignments_shape,
compression=4, shuffle=True,
chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype),
fillvalue=nstates)
statelabels_ds = self.output_file.create_dataset('statelabels', dtype=trajlabel_dtype, shape=assignments_shape,
compression=4, shuffle=True,
chunks=h5io.calc_chunksize(assignments_shape, trajlabel_dtype),
fillvalue=nstates)
pops_shape = (iter_count,nstates+1,nbins+1)
pops_ds = self.output_file.create_dataset('labeled_populations', dtype=weight_dtype, shape=pops_shape,
compression=4, shuffle=True,
chunks=h5io.calc_chunksize(pops_shape, weight_dtype))
h5io.label_axes(pops_ds, [numpy.string_(i) for i in ['iteration', 'state', 'bin']])
pi.new_operation('Assigning to bins', iter_stop-iter_start)
last_labels = None # mapping of seg_id to last macrostate inhabited
for iiter, n_iter in enumerate(range(iter_start,iter_stop)):
#get iteration info in this block
if iiter == 0:
last_labels = numpy.empty((nsegs[iiter],), index_dtype)
last_labels[:] = nstates #unknown state
#Slices this iteration into n_workers groups of segments, submits them to wm, splices results back together
assignments, trajlabels, pops, statelabels = self.assign_iteration(n_iter, nstates, nbins, state_map, last_labels)
##Do stuff with this iteration's results