Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
category_length = relevant_X.shape[0]
result = {
'trained_category_model': category_trained_final_model
, 'category': category
, 'len_relevant_X': category_length
}
return result
if os.environ.get('is_test_suite', False) == 'True':
# If this is the test_suite, do not run things in parallel
results = list(map(lambda x: train_one_categorical_model(x[0], x[1], x[2]), categories_and_data))
else:
pool = pathos.multiprocessing.ProcessPool()
# Since we may have already closed the pool, try to restart it
try:
pool.restart()
except AssertionError as e:
pass
try:
results = list(pool.map(lambda x: train_one_categorical_model(x[0], x[1], x[2]), categories_and_data))
except RuntimeError:
# Deep Learning models require a ton of recursion. I've tried to work around it, but sometimes we just need to brute force the solution here
original_recursion_limit = sys.getrecursionlimit()
sys.setrecursionlimit(10000)
results = list(pool.map(lambda x: train_one_categorical_model(x[0], x[1], x[2]), categories_and_data))
sys.setrecursionlimit(original_recursion_limit)
output_dir = args.output_dir
if not os.path.exists(output_dir):
os.makedirs(output_dir)
run_args = []
numa_queue = get_numa_queue()
for (model_dir, (num_factors, num_users, num_items, _, scale),
_) in TO_RUN:
input_dir = os.path.join(MODEL_DIR_BASE, model_dir)
base_name = model_dir.replace('/', '-')
for K in TOP_K:
run_args.append((numa_queue, num_factors, num_users, num_items, K,
scale, input_dir, base_name, output_dir, runner))
pool = multiprocessing.ProcessPool(
NUM_NUMA_NODES) # Only run 4 jobs at once, since we have 4 NUMA nodes
pool.map(run, run_args)
# Don't bother parallelizing if this is a single dictionary
if X.shape[0] == 1:
predictions_from_all_estimators = map(lambda predictor: get_predictions_for_one_estimator(predictor, X), self.ensemble_predictors)
else:
# Pathos doesn't like datasets beyond a certain size. So fall back on single, non-parallel predictions instead.
# try:
if os.environ.get('is_test_suite', False) == 'True':
predictions_from_all_estimators = map(lambda predictor: get_predictions_for_one_estimator(predictor, X), self.ensemble_predictors)
else:
# Open a new multiprocessing pool
pool = pathos.multiprocessing.ProcessPool()
# Since we may have already closed the pool, try to restart it
try:
pool.restart()
except AssertionError as e:
pass
predictions_from_all_estimators = pool.map(lambda predictor: get_predictions_for_one_estimator(predictor, X), self.ensemble_predictors)
# Once we have gotten all we need from the pool, close it so it's not taking up unnecessary memory
pool.close()
try:
pool.join()
except AssertionError:
pass
predictions_from_all_estimators = list(predictions_from_all_estimators)
def parallel_num_iterator(self, num_proc=None):
nerr_tups = list(self._nsub_ndel_nins_iterator())
def dna_nums_given_nerr_tup(nerr_tup):
nsub, ndel, nins = nerr_tup
return [seqtools.dna2num(seq)
for seq in self._freediv_subsphere_given_counts(nsub, ndel, nins)]
pl = ProcessPool(num_proc)
results = pl.map(dna_nums_given_nerr_tup, nerr_tups)
for num in itertools.chain(*results):
yield num
pl._clear()
del pl
def upload_all():
path = '/home/edward/waymo/training/'
filenames = os.listdir(path)
filenames.sort()
pool = ProcessPool(16)
data = pool.map(frames_tfrecord, map(lambda f: path + f, filenames))
frames = sum(data, 0)
print('Frames in files: {}, Total: {}'.format(data, frames))
start_frame = []
for i in range(0, frames):
start_frame.append(sum(data[:i],0))
dataset_type = 'training'
version = 'v2'
storage = S3(bucket='waymo-dataset-upload')
labels_arr = hub.array(shape=(frames, 2, 6, 30, 7), chunk_size=(100, 2, 6, 30, 7), storage=storage, name='edward/{}-labels:{}'.format(dataset_type, version), backend='s3', dtype='float64')
images_arr = hub.array(compression='jpeg', shape=(frames, 6, 1280, 1920, 3), storage=storage, name='edward/{}-camera-images:{}'.format(dataset_type, version), backend='s3', dtype='uint8', chunk_size=(1, 6, 1280, 1920, 3))
def upload_record(i):
upload_tfrecord(dataset_type, path + filenames[i], version, start_frame[i])
# Extract num_cpus
num_cpus = kwargs.pop('num_cpus', None)
# Determine num_cpus
if num_cpus is None:
num_cpus = cpu_count()
elif type(num_cpus) == float:
num_cpus = int(round(num_cpus * cpu_count()))
# Determine length of tqdm (equal to length of shortest iterable)
length = min(len(iterable) for iterable in iterables if isinstance(iterable, Sized))
# Create parallel generator
map_type = 'imap' if ordered else 'uimap'
pool = Pool(num_cpus)
map_func = getattr(pool, map_type)
for item in tqdm(map_func(function, *iterables), total=length, **kwargs):
yield item
pool.clear()
if not silent:
print('+ Started new fit.')
print(' + {} analyses to run.'.format(len(self.hyperGridValues)))
# check if multiprocessing is available
if nJobs > 1:
try:
from pathos.multiprocessing import ProcessPool
except ImportError:
raise ImportError('No module named pathos.multiprocessing. This module represents an optional '
'dependency of bayesloop and is therefore not installed alongside bayesloop.')
# prepare parallel execution
if not silent:
print(' + Creating {} processes.'.format(nJobs))
pool = ProcessPool(nodes=nJobs)
# use parallelFit method to create copies of this HyperStudy instance with only partial
# hyper-grid values
subStudies = pool.map(self._parallelFit,
range(nJobs),
[nJobs]*nJobs,
[forwardOnly]*nJobs,
[evidenceOnly]*nJobs,
[silent]*nJobs)
# prevent memory pile-up in main process
pool.close()
pool.join()
pool.terminate()
pool.restart()
def main():
path = '/home/edward/waymo/training/'
dataset_type = 'training'
version = 'v2'
filenames = os.listdir(path)
filenames.sort()
pool = ProcessPool(16)
frame_count_arr = pool.map(frames_tfrecord, map(lambda f: path + f, filenames))
frames = sum(frame_count_arr, 0)
str_lasers_range_image = 'edward/{}-lasers-range-image:{}'.format(dataset_type, version)
str_lasers_range_image_first = 'edward/{}-lasers-range-image-first:{}'.format(dataset_type, version)
str_lasers_camera_proj = 'edward/{}-lasers-camera-proj:{}'.format(dataset_type, version)
str_lasers_camera_proj_first = 'edward/{}-lasers-camera-proj-first:{}'.format(dataset_type, version)
storage = S3(bucket='waymo-dataset-upload')
hub.array(shape=(frames, 4, 2, 200, 600, 4), dtype='float32', backend='s3', storage=storage, name=str_lasers_range_image, chunk_size=(1, 4, 2, 200, 600, 4))
hub.array(shape=(frames, 4, 2, 200, 600, 6), dtype='int32', backend='s3', storage=storage, name=str_lasers_camera_proj, chunk_size=(1, 4, 2, 200, 600, 6))
hub.array(shape=(frames, 2, 64, 2650, 4), dtype='float32', backend='s3', storage=storage, name=str_lasers_range_image_first, chunk_size=(1, 2, 64, 2650, 4))
hub.array(shape=(frames, 2, 64, 2650, 6), dtype='int32', backend='s3', storage=storage, name=str_lasers_camera_proj_first, chunk_size=(1, 2, 64, 2650, 6))
start_frame_arr = []
for i in range(0, len(filenames)):
start_frame_arr.append(sum(frame_count_arr[:i], 0))