Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import time
import xml.etree.ElementTree as ET
import numpy as np
from optparse import OptionParser
from gensim.models import word2vec
from nltools import misc, tokenizer
from align_model import AlignModel
OUTPUT_DIR = 'out'
#
# init, cmdline
#
misc.init_app('csv_align')
parser = OptionParser("usage: %prog [options] foo.csv")
parser.add_option ("-v", "--verbose", action="store_true", dest="verbose",
help="verbose output")
(options, args) = parser.parse_args()
if options.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
if len(args) != 1:
parser.print_usage()
sys.exit(1)
promptsfn = '%s/etc/prompts-original' % dstdir
transfn = '%s/%s/%s/%s/%s-%s.trans.txt' % (srcdir, subset, speaker, book_id, speaker, book_id)
with codecs.open (promptsfn, 'w', 'utf8') as promptsf:
with codecs.open(transfn, 'r', 'utf8') as transf:
for line in transf:
parts = line.split()
promptsf.write(line)
flac_src = '%s/%s/%s/%s/%s.flac' % (srcdir, subset, speaker, book_id, parts[0])
flac_dst = '%s/flac/%s.flac' % (dstdir, parts[0])
logging.debug (' %s -> %s' % (flac_src, flac_dst))
misc.symlink(flac_src, flac_dst)
logging.debug ('%s written.' % promptsfn)
nwords = 0
for word in words:
if word in index2word_set:
nwords = nwords+1
featureVec = np.add(featureVec, model[word])
if nwords>0:
featureVec = np.divide(featureVec, nwords)
return featureVec
#
# init, cmdline
#
misc.init_app('aim2topics')
parser = OptionParser("usage: %prog [options] foo.aiml")
parser.add_option ("-l", "--lang", dest="lang", type = "string", default='en',
help="language, default: en")
parser.add_option ("-v", "--verbose", action="store_true", dest="verbose",
help="verbose output")
(options, args) = parser.parse_args()
if options.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
if len(args) != 1:
VOICE_PATH = 'data/model/%s'
# VOICE = 'voice-karlsson-latest'
DEFAULT_VOICE = 'voice-linda-latest'
BATCH_SIZE = 32
# x_13036.npy (1, 240) these figures suggest that the agents of the secret service are substantially overworked .
X_FN = NPDIR + 'x_13036.npy'
X_TXT = 'These figures suggest that the agents of the secret service are substantially overworked.'
X_L = len(X_TXT)
#
# init
#
misc.init_app(PROC_TITLE)
#
# command line
#
parser = OptionParser("usage: %prog [options] [
misc.mkdirs('%s/valid' % data_dir)
misc.mkdirs('%s/train' % data_dir)
#
# load dict
#
logging.info("loading lexicon...")
lex = Lexicon(file_name=dictionary)
logging.info("loading lexicon...done.")
#
# language model
#
misc.copy_file('%s/lm.arpa' % language_model_dir, '%s/lm.arpa' % data_dir)
#
# scripts
#
misc.render_template('data/src/speech/w2l_run_train.sh.template', '%s/run_train.sh' % work_dir, w2l_env_activate=w2l_env_activate, w2l_train=w2l_train)
misc.render_template('data/src/speech/w2l_run_decode.sh.template', '%s/run_decode.sh' % work_dir, w2l_env_activate=w2l_env_activate, w2l_train=w2l_train)
misc.mkdirs('%s/config/conv_glu' % work_dir)
misc.render_template('data/src/speech/w2l_config_conv_glu_train.cfg.template', '%s/config/conv_glu/train.cfg' % work_dir, runname=model_name)
misc.copy_file('data/src/speech/w2l_config_conv_glu_network.arch', '%s/config/conv_glu/network.arch' % work_dir)
#
# export audio
#
if not pws in ps:
ps[pws] = set([p])
else:
ps[pws].add(p)
logging.info ( "%s written." % dictfn2 )
logging.info ( "Exporting dictionary ... done." )
#
# copy phoneme sets from original model
#
misc.copy_file ('%s/data/local/dict/nonsilence_phones.txt' % src_model, '%s/data/local/dict/nonsilence_phones.txt' % dst_dir)
misc.copy_file ('%s/data/local/dict/silence_phones.txt' % src_model, '%s/data/local/dict/silence_phones.txt' % dst_dir)
misc.copy_file ('%s/data/local/dict/optional_silence.txt' % src_model, '%s/data/local/dict/optional_silence.txt' % dst_dir)
misc.copy_file ('%s/data/local/dict/extra_questions.txt' % src_model, '%s/data/local/dict/extra_questions.txt' % dst_dir)
#
# language model / grammar
#
if lm_name.endswith('arpa'):
misc.copy_file (lm_name, '%s/lm.arpa' % dst_dir)
elif lm_name.endswith('jsgf'):
misc.copy_file (lm_name, '%s/G.jsgf' % dst_dir)
else:
misc.copy_file (lm_name, '%s/G.src.fst' % dst_dir)
#
def main(verbose=False):
"""Convert gspv2 corpus to the VoxForge corpus format
The variable `speech_arc` in ~/.speechrc must point to a folder
gspv2 which is used as the source containing the original gspv2 corpus,
i.e. containing the subfolders dev, test, and train.
The variable `speech_corpora` in ~/.speechrc must point to a folder
where the resulting corpus should be written. The script will create
a subfolder gspv2 here for the resulting voxforge-formatted data.
"""
misc.init_app('speech_audio_scan')
config = misc.load_config('.speechrc')
if verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.INFO)
speech_arc_dir = Path(config.get("speech", "speech_arc"))
speech_corpora_dir = Path(config.get("speech", "speech_corpora"))
src_root_dir = speech_arc_dir / "gspv2"
dst_root_dir = speech_corpora_dir / "gspv2"
exit_if_dst_root_dir_exists(dst_root_dir)
speakers = set()
speaker_gender = {}