Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
dtype=np.uint8)
data_sketch = hfw.create_dataset("%s_data_sketch" % dset_type,
(0, nb_channels, size, size),
maxshape=(None, 3, size, size),
dtype=np.uint8)
num_files = len(list_img)
chunk_size = 100
num_chunks = num_files / chunk_size
arr_chunks = np.array_split(np.arange(num_files), num_chunks)
for chunk_idx in tqdm(arr_chunks):
list_img_path = list_img[chunk_idx].tolist()
output = parmap.map(format_image, list_img_path, size, nb_channels, pm_parallel=False)
arr_img_full = np.concatenate([o[0] for o in output], axis=0)
arr_img_sketch = np.concatenate([o[1] for o in output], axis=0)
# Resize HDF5 dataset
data_full.resize(data_full.shape[0] + arr_img_full.shape[0], axis=0)
data_sketch.resize(data_sketch.shape[0] + arr_img_sketch.shape[0], axis=0)
data_full[-arr_img_full.shape[0]:] = arr_img_full.astype(np.uint8)
data_sketch[-arr_img_sketch.shape[0]:] = arr_img_sketch.astype(np.uint8)
list_img = np.array(list_img)
data_color = hfw.create_dataset("data",
(0, 3, size, size),
maxshape=(None, 3, size, size),
dtype=np.uint8)
num_files = len(list_img)
chunk_size = 2000
num_chunks = num_files / chunk_size
arr_chunks = np.array_split(np.arange(num_files), num_chunks)
for chunk_idx in tqdm(arr_chunks):
list_img_path = list_img[chunk_idx].tolist()
output = parmap.map(format_image, list_img_path, size, pm_parallel=True)
arr_img_color = np.concatenate(output, axis=0)
# Resize HDF5 dataset
data_color.resize(data_color.shape[0] + arr_img_color.shape[0], axis=0)
data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(np.uint8)
dtype=np.float64)
data_black = hfw.create_dataset("%s_black_data" % dset_type,
(0, 1, size, size),
maxshape=(None, 1, size, size),
dtype=np.uint8)
num_files = len(list_img)
chunk_size = 1000
num_chunks = num_files / chunk_size
arr_chunks = np.array_split(np.arange(num_files), num_chunks)
for chunk_idx in tqdm(arr_chunks):
list_img_path = list_img[chunk_idx].tolist()
output = parmap.map(format_image, list_img_path, size, pm_parallel=True)
arr_img_color = np.vstack([o[0] for o in output if o[0].shape[0] > 0])
arr_img_lab = np.vstack([o[1] for o in output if o[0].shape[0] > 0])
arr_img_black = np.vstack([o[2] for o in output if o[0].shape[0] > 0])
# Resize HDF5 dataset
data_color.resize(data_color.shape[0] + arr_img_color.shape[0], axis=0)
data_lab.resize(data_lab.shape[0] + arr_img_lab.shape[0], axis=0)
data_black.resize(data_black.shape[0] + arr_img_black.shape[0], axis=0)
data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(np.uint8)
data_lab[-arr_img_lab.shape[0]:] = arr_img_lab.astype(np.float64)
data_black[-arr_img_black.shape[0]:] = arr_img_black.astype(np.uint8)
list_img = np.array(list_img)
data_color = hfw.create_dataset("data",
(0, 3, size, size),
maxshape=(None, 3, size, size),
dtype=np.uint8)
num_files = len(list_img)
chunk_size = 2000
num_chunks = num_files / chunk_size
arr_chunks = np.array_split(np.arange(num_files), num_chunks)
for chunk_idx in tqdm(arr_chunks):
list_img_path = list_img[chunk_idx].tolist()
output = parmap.map(format_image, list_img_path, size, pm_parallel=True)
arr_img_color = np.concatenate(output, axis=0)
# Resize HDF5 dataset
data_color.resize(data_color.shape[0] + arr_img_color.shape[0], axis=0)
data_color[-arr_img_color.shape[0]:] = arr_img_color.astype(np.uint8)
component_resp_dict = component_resp_df.to_dict()
sys_output_dict = {k: {o: 0 for o in fc.network.out_node_list}
for k in sc.hazard_intensity_str}
ids_comp_vs_haz = {p: np.zeros((sc.num_samples, fc.num_elements))
for p in sc.hazard_intensity_str}
calculated_output_array = np.zeros((sc.num_samples, sc.num_hazard_pts))
economic_loss_array = np.zeros_like(calculated_output_array)
output_array_given_recovery = np.zeros(
(sc.num_samples, sc.num_hazard_pts, sc.num_time_steps)
)
if parallel_proc:
print('\nInitiating computation of loss arrays...')
print(Fore.YELLOW + 'using parallel processing\n' + Fore.RESET)
parallel_return = parmap.map(
multiprocess_enabling_loop, range(len(sc.hazard_intensity_str)),
sc.hazard_intensity_str, sc.num_hazard_pts, fc, sc
)
for idxPGA, _PGA in enumerate(sc.hazard_intensity_str):
ids_comp_vs_haz[_PGA] = parallel_return[idxPGA][0]
sys_output_dict[_PGA] = parallel_return[idxPGA][1]
component_resp_dict[_PGA] = parallel_return[idxPGA][2]
calculated_output_array[:, idxPGA] = parallel_return[idxPGA][3]
economic_loss_array[:, idxPGA] = parallel_return[idxPGA][4]
output_array_given_recovery[:, idxPGA, :] = \
parallel_return[idxPGA][5]
else:
print('\nInitiating computation of loss arrays...')
print(Fore.RED + 'not using parallel processing\n' + Fore.RESET)
for idxPGA, _PGA in enumerate(sc.hazard_intensity_str):
positive_data_train_list = parmap.map(extract_data_from_bed,
zip(chip_bed_train_list, bigwig_files_list, meta_list),
True, positive_label, gencode)
positive_data_train = list(itertools.chain(*positive_data_train_list))
#Validation
print 'Extracting data from positive validation BEDs'
positive_data_valid_list = parmap.map(extract_data_from_bed,
zip(chip_bed_valid_list, valid_bigwig_files_list, valid_meta_list),
False, positive_label, gencode)
positive_data_valid = list(itertools.chain(*positive_data_valid_list))
print 'Shuffling positive training windows in negative regions'
train_noOverlap = True
train_randomseeds = np.random.randint(-214783648, 2147483647, num_cells)
positive_windows_train_list = parmap.map(data_to_bed, positive_data_train_list)
negative_windows_train_list = parmap.map(negative_shuffle_wrapper,
zip(positive_windows_train_list, nonnegative_regions_bed_list,
bigwig_files_list, train_randomseeds),
genome_bed_train, negatives*epochs, train_noOverlap)
print 'Shuffling positive validation windows in negative regions'
valid_randomseeds = np.random.randint(-214783648, 2147483647, num_cells)
positive_windows_valid_list = parmap.map(data_to_bed, positive_data_valid_list)
negative_windows_valid_list = parmap.map(negative_shuffle_wrapper,
zip(positive_windows_valid_list, nonnegative_regions_bed_list,
bigwig_files_list, valid_randomseeds),
genome_bed_valid, negatives, True)
negative_label = [False]
#Train
print 'Extracting data from negative training BEDs'
negative_windows_valid_list = parmap.map(negative_shuffle_wrapper,
zip(positive_windows_valid_list, nonnegative_regions_bed_list,
bigwig_files_list, valid_randomseeds),
genome_bed_valid, negatives, True)
negative_label = [False]
#Train
print 'Extracting data from negative training BEDs'
negative_data_train_list = parmap.map(extract_data_from_bed,
zip(negative_windows_train_list, bigwig_files_list, meta_list),
False, negative_label, gencode)
negative_data_train = list(itertools.chain(*negative_data_train_list))
#Validation
print 'Extracting data from negative validation BEDs'
negative_data_valid_list = parmap.map(extract_data_from_bed,
zip(negative_windows_valid_list, valid_bigwig_files_list, valid_meta_list),
False, negative_label, gencode)
negative_data_valid = list(itertools.chain(*negative_data_valid_list))
data_valid = negative_data_valid + positive_data_valid
print 'Shuffling training data'
num_negatives_per_epoch = negatives*len(positive_data_train)
np.random.shuffle(negative_data_train)
data_train = []
for i in xrange(epochs):
epoch_data = []
epoch_data.extend(positive_data_train)
epoch_data.extend(negative_data_train[i*num_negatives_per_epoch:(i+1)*num_negatives_per_epoch])
np.random.shuffle(epoch_data)
data_train.extend(epoch_data)
component_resp_dict = dict()
sys_output_dict = {k: {o: 0 for o in fc.network.out_node_list}
for k in sc.hazard_intensity_str}
ids_comp_vs_haz = {p: np.zeros((sc.num_samples, fc.num_elements))
for p in sc.hazard_intensity_str}
calculated_output_array = np.zeros((sc.num_samples, sc.num_hazard_pts))
economic_loss_array = np.zeros_like(calculated_output_array)
output_array_given_recovery = np.zeros(
(sc.num_samples, sc.num_hazard_pts, sc.num_time_steps)
)
if parallel_proc:
logging.info('\nInitiating computation of loss arrays...')
logging.info(Fore.YELLOW + 'using parallel processing\n' + Fore.RESET)
parallel_return = parmap.map(
multiprocess_enabling_loop, range(len(sc.hazard_intensity_str)),
sc.hazard_intensity_str, sc.num_hazard_pts, fc, sc
)
for idxPGA, _PGA in enumerate(sc.hazard_intensity_str):
ids_comp_vs_haz[_PGA] = parallel_return[idxPGA][0]
sys_output_dict[_PGA] = parallel_return[idxPGA][1]
component_resp_dict[_PGA] = parallel_return[idxPGA][2]
calculated_output_array[:, idxPGA] = parallel_return[idxPGA][3]
economic_loss_array[:, idxPGA] = parallel_return[idxPGA][4]
output_array_given_recovery[:, idxPGA, :] = \
parallel_return[idxPGA][5]
else:
logging.info('\nInitiating computation of loss arrays...')
logging.info(Fore.RED + 'not using parallel processing\n' + Fore.RESET)
for idxPGA, _PGA in enumerate(sc.hazard_intensity_str):
print 'Splitting ChIP peaks into training, validation, and testing BEDs'
chip_bed_split_list = parmap.map(valid_test_split_wrapper, chip_bed_list, valid_chroms, test_chroms)
chip_bed_train_list, chip_bed_valid_list, chip_bed_test_list = zip(*chip_bed_split_list)
if valid_chip_bed_list: # the user specified a validation directory, must adjust validation data
valid_chip_bed_split_list = parmap.map(valid_test_split_wrapper, valid_chip_bed_list, valid_chroms, test_chroms)
_, chip_bed_valid_list, _ = zip(*valid_chip_bed_split_list)
else:
valid_nonnegative_regions_bed_list = nonnegative_regions_bed_list
valid_bigwig_files_list = bigwig_files_list
valid_meta_list = meta_list
positive_label = [True]
#Train
print 'Extracting data from positive training BEDs'
positive_data_train_list = parmap.map(extract_data_from_bed,
zip(chip_bed_train_list, bigwig_files_list, meta_list),
True, positive_label, gencode)
positive_data_train = list(itertools.chain(*positive_data_train_list))
#Validation
print 'Extracting data from positive validation BEDs'
positive_data_valid_list = parmap.map(extract_data_from_bed,
zip(chip_bed_valid_list, valid_bigwig_files_list, valid_meta_list),
False, positive_label, gencode)
positive_data_valid = list(itertools.chain(*positive_data_valid_list))
print 'Shuffling positive training windows in negative regions'
train_noOverlap = True
train_randomseeds = np.random.randint(-214783648, 2147483647, num_cells)
positive_windows_train_list = parmap.map(data_to_bed, positive_data_train_list)
negative_windows_train_list = parmap.map(negative_shuffle_wrapper,