Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Excluding samples that failed on label validation
counter['invalid_label'] += 1
elif int(frames/SAMPLE_RATE*1000/10/2) < len(str(label)):
# Excluding samples that are too short to fit the transcript
counter['too_short'] += 1
elif frames/SAMPLE_RATE > MAX_SECS:
# Excluding very long samples to keep a reasonable batch-size
counter['too_long'] += 1
else:
# This one is good - keep it for the target CSV
rows.append((os.path.split(wav_filename)[-1], file_size, label))
counter['all'] += 1
counter['total_time'] += frames
print("Importing mp3 files...")
pool = Pool(cpu_count())
bar = progressbar.ProgressBar(max_value=num_samples, widgets=SIMPLE_BAR)
for i, _ in enumerate(pool.imap_unordered(one_sample, samples), start=1):
bar.update(i)
bar.update(num_samples)
pool.close()
pool.join()
with open(output_csv, 'w', encoding='utf-8') as output_csv_file:
print('Writing CSV file for DeepSpeech.py as: ', output_csv)
writer = csv.DictWriter(output_csv_file, fieldnames=FIELDNAMES)
writer.writeheader()
bar = progressbar.ProgressBar(max_value=len(rows), widgets=SIMPLE_BAR)
for filename, file_size, transcript in bar(rows):
if space_after_every_character:
writer.writerow({'wav_filename': filename, 'wav_filesize': file_size, 'transcript': ' '.join(transcript)})
else:
self.freq = None
self.group = None
self.subp = None
self.accession = None
self.neighbour_seqs = None
self.ec = None #added by Le Feb 19, 2020
def callCustom(args):
return call(args, shell=True)
print ("Assigning proteins to groups")
args_array = []
var1 = 1
varlist = " ".join(str(x) for x in variables)
pool = ThreadPool(threads)
while var1 <= threads:
args_array.append(("bact_group_many_proteins_many_patterns.py "+ str(var1) + " " + varlist))
var1 += 1
pool.map(callCustom, args_array)
print("Collecting Results")
pep_list_array = []
try:
f = open(peptide_dir_name+"/large_fams.txt", 'r')
except:
f = open(peptide_dir_name+"/fam_list.txt", 'r')
for line in f:
pep_list_array.append(line.rstrip())
f.close()
pep_list_hash = {}
if book_id_list:
self.book_spider_log.info(
'重新获取' + str(tag) + 'tag, 第' + str(start) + '个书籍ID失败, 重试第' + str(i) + '次数成功')
break
else:
self.book_spider_log.info(
'重新获取' + str(tag) + 'tag, 第' + str(start) + '个书籍ID失败, 重试第' + str(i) + '次数失败')
time.sleep(10)
if not book_id_list:
start += 20
continue
elif not book_id_list:
break
# 多线程获取书籍Info
movie_pool = ThreadPool(12)
movie_pool.map(self.get_book_info, book_id_list)
movie_pool.close()
movie_pool.join()
# 多线程获取电影作者信息
person_id_list = []
while self.redis_con.llen('author_queue'):
# 出队列获取作者ID
person_id_list.append(str(self.redis_con.rpop('author_queue').decode('utf-8')))
author_poll = ThreadPool(12)
author_poll.map(self.get_person_info, person_id_list)
author_poll.close()
author_poll.join()
# 进行下一轮迭代
start += 20
def undeploy(self, lab_hash, selected_machines=None):
machines = self.get_machines_by_filters(lab_hash=lab_hash)
cpus = cpu_count()
machines_pool = Pool(cpus)
items = [machines] if len(machines) < cpus else \
utils.list_chunks(machines, cpus)
for chunk in items:
machines_pool.map(func=partial(self._undeploy_machine, selected_machines, True), iterable=chunk)
for f in os.listdir(path+'/'+g):
process_disease_file(path,g,db.tweets,f)
except:
continue
###################################################################################
if __name__ == '__main__':
path=sys.argv[1]
#Multiprocessing
pool = ThreadPool(6)#You should modify this function depending on the number of cores in your computer
#Database
client = MongoClient()
db = client['HealthCare_Twitter_Analysis']
#collection = db.tweets
#Use test database for debugging
#db = client['test']
#Navigate directory structure
for g in os.listdir(path):
try:
files=os.listdir(path+'/'+g)
partial_process_disease_file=partial(process_disease_file,path,g,db.tweets)
pool.map(partial_process_disease_file,files)
calculator = TimeSeriesCalculator()
for dayinseconds in daysinrange:
result = calculator.calc_average_on_day(min_lat, max_lat, min_lon, max_lon, ds, dayinseconds)
results.append(result)
else:
# Create a task to calc average difference for each day
manager = Manager()
work_queue = manager.Queue()
done_queue = manager.Queue()
for dayinseconds in daysinrange:
work_queue.put(
('calc_average_on_day', min_lat, max_lat, min_lon, max_lon, ds, dayinseconds))
[work_queue.put(SENTINEL) for _ in xrange(0, maxprocesses)]
# Start new processes to handle the work
pool = Pool(maxprocesses)
[pool.apply_async(pool_worker, (work_queue, done_queue)) for _ in xrange(0, maxprocesses)]
pool.close()
# Collect the results as [(day (in ms), average difference for that day)]
for i in xrange(0, len(daysinrange)):
result = done_queue.get()
try:
error_str = result['error']
self.log.error(error_str)
raise NexusProcessingException(reason="Error calculating average by day.")
except KeyError:
pass
results.append(result)
pool.terminate()
card['hpwn'] = hpid
except Exception as e:
urlName = getHearthHeadId(name)
url = 'https://www.hearthstonetopdecks.com/cards/{}/'.format(urlName)
_, cardHTD = parseHTD(url, session)
card['cdn'] = cardHTD['cdn']
card['hpwn'] = 12288
card['head'] = getHearthHeadId(name)
currentSet[card['name']] = card
print('.', end='')
saveCardsAsJson(filename, currentSet)
resultCards.update(currentSet)
with Pool(4) as p:
p.map(doSet, sets)
return resultCards
def ssh_exec(self, command):
self._log("Executing command on all nodes: {}".format(command))
args = [(self._single_ssh_exec, instance, command) for instance in self._get_running_instances()]
pool = Pool(ChainspaceNetwork.threads)
result = pool.map(_multi_args_wrapper, args)
pool.close()
pool.join()
self._log("Executed command on all nodes: {}".format(command))
return result
def recursive(self, my_dir):
self.print_info("Recursive mode")
files_suid = []
files_sgid = []
files_list = []
for cpwd, dirs, files in walk(my_dir):
if cpwd.endswith("/"):
cwd = cpwd
else:
cwd = cpwd + "/"
for f in files:
files_list.append(cwd + f)
pool = Pool(8)
results = pool.map(self.is_suid_sgid, files_list)
pool.close()
pool.join()
for result in results:
if result[0]:
files_suid.append(result[0])
if result[1]:
files_sgid.append(result[1])
return [files_suid, files_sgid]
def clone_repos(self, inputs, output, ignore):
with Pool(self._num_threads) as pool:
pool.map(partial(self.process_repo, ignore=ignore, target_dir=output),
self.generate_repo_urls(inputs))