Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
shot_list_picked = shot_list.random_sublist(use_shots)
previously_saved, machines_saved = self.previously_saved_stats()
machines_to_compute = all_machines - machines_saved
recompute = conf['data']['recompute_normalization']
if recompute:
machines_to_compute = all_machines
previously_saved = False
if not previously_saved or len(machines_to_compute) > 0:
if previously_saved:
self.load_stats(verbose=True)
print('computing normalization for machines {}'.format(
machines_to_compute))
use_cores = max(1, mp.cpu_count()-2)
pool = mp.Pool(use_cores)
print('running in parallel on {} processes'.format(
pool._processes))
start_time = time.time()
for (i, stats) in enumerate(pool.imap_unordered(
self.train_on_single_shot, shot_list_picked)):
# for (i,stats) in
# enumerate(map(self.train_on_single_shot,shot_list_picked)):
if stats.machine in machines_to_compute:
self.incorporate_stats(stats)
self.machines.add(stats.machine)
sys.stdout.write('\r'
+ '{}/{}'.format(i, len(shot_list_picked)))
pool.close()
pool.join()
print('\nFinished Training Normalizer on ',
Statistical values of all combinations
list
Input parameters of all combinations
"""
if not tuned_params:
tuned_params = self.tuned_params
if self.multiproc:
if self.multiproc is True:
n_cpu = multiprocessing.cpu_count()
else:
n_cpu = self.multiproc
pool = multiprocessing.Pool(n_cpu)
result = pool.map_async(self.evaluate, tuned_params)
pool.close() # No more work
n = len(tuned_params)
while True:
if result.ready():
break
sys.stderr.write('\rEvaluated combinations: {0} / {1}'.format(n - result._number_left, n))
time.sleep(0.6)
result = result.get()
sys.stderr.write('\rAll {0} combinations have been evaluated'.format(n))
# result = pool.map(self.evaluate, tuned_params)
else:
result = [self.evaluate(tp) for tp in tuned_params]
runner = './rmse.py'
output_dir = args.output_dir
if not os.path.exists(output_dir):
os.makedirs(output_dir)
run_args = []
numa_queue = get_numa_queue(2)
for (model_dir, _, (training_file, test_file)) in TO_RUN:
input_dir = os.path.join(MODEL_DIR_BASE, model_dir)
base_name = model_dir.replace('/', '-')
run_args.append((numa_queue, training_file, test_file, input_dir,
base_name, output_dir, runner))
pool = multiprocessing.Pool(NUM_NUMA_NODES * 2)
pool.map(run, run_args)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
run_args = []
numa_queue = get_numa_queue()
for (model_dir, (num_factors, _, num_items, _, _), _) in TO_RUN:
input_dir = os.path.join(MODEL_DIR_BASE, model_dir)
base_name = model_dir.replace('/', '-')
for K, alg in product(TOP_K, ALGS):
run_args.append(
(numa_queue, K, alg, scaling_value, sigma, num_factors,
num_items, sample_size, args.run_fexipro, input_dir,
base_name, output_dir, blocked_mm_runner, other_runner))
pool = multiprocessing.Pool(
NUM_NUMA_NODES) # Only run 4 jobs at once, since we have 4 NUMA nodes
pool.map(run, run_args)
model_filename # save_prepath + model_filename
else:
model_path = custom_path
model = joblib.load(model_path)
# shot_list = shot_list.random_sublist(10)
y_prime = []
y_gold = []
disruptive = []
pbar = Progbar(len(shot_list))
fn = partial(
predict_single_shot,
model=model,
feature_extractor=feature_extractor)
pool = mp.Pool()
print('predicting in parallel on {} processes'.format(pool._processes))
# for (y_p, y, disr) in map(fn, shot_list):
for (y_p, y, disr) in pool.imap(fn, shot_list):
# y_p, y, disr = predict_single_shot(model, feature_extractor,shot)
y_prime += [np.expand_dims(y_p, axis=1)]
y_gold += [np.expand_dims(y, axis=1)]
disruptive += [disr]
pbar.add(1.0)
pool.close()
pool.join()
return y_prime, y_gold, disruptive
def preprocess_from_files(self, shot_files, use_shots):
# all shots, including invalid ones
all_signals = self.conf['paths']['all_signals']
shot_list = ShotList()
shot_list.load_from_shot_list_files_objects(shot_files, all_signals)
shot_list_picked = shot_list.random_sublist(use_shots)
# empty
used_shots = ShotList()
# TODO(KGF): generalize the follwowing line to perform well on
# architecutres other than CPUs, e.g. KNLs
# min( , max(1,mp.cpu_count()-2) )
use_cores = max(1, mp.cpu_count() - 2)
pool = mp.Pool(use_cores)
print('Running in parallel on {} processes'.format(pool._processes))
start_time = time.time()
for (i, shot) in enumerate(pool.imap_unordered(
self.preprocess_single_file, shot_list_picked)):
# for (i,shot) in
# enumerate(map(self.preprocess_single_file,shot_list_picked)):
sys.stdout.write('\r{}/{}'.format(i, len(shot_list_picked)))
used_shots.append_if_valid(shot)
pool.close()
pool.join()
print('Finished Preprocessing {} files in {} seconds'.format(
len(shot_list_picked), time.time() - start_time))
print('Omitted {} shots of {} total.'.format(
len(shot_list_picked) - len(used_shots), len(shot_list_picked)))
print('{}/{} disruptive shots'.format(used_shots.num_disruptive(),
def hardresetpool():
global pool
pool.terminate()
pool.join()
pool = multiprocessing.Pool()
if not os.path.exists(output_dir):
os.makedirs(output_dir)
run_args = []
numa_queue = get_numa_queue()
for (model_dir, (num_factors, num_users, num_items, _, _), _) in TO_RUN:
input_dir = os.path.join(MODEL_DIR_BASE, model_dir)
base_name = model_dir.replace('/', '-')
for K, num_threads, user_sample_ratio in product(
TOP_K, NUM_THREADS, USER_SAMPLE_RATIOS):
run_args.append((numa_queue, num_factors, num_users, num_items, K,
num_threads, args.sample, user_sample_ratio,
input_dir, base_name, output_dir, runner))
pool = multiprocessing.Pool(
NUM_NUMA_NODES) # Only run 4 jobs at once, since we have 4 NUMA nodes
pool.map(run, run_args)