How to use the nltools.misc function in nltools

To help you get started, we’ve selected a few nltools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github gooofy / zamia-ai / data-tools / csv / csv_align.py View on Github external
import time
import xml.etree.ElementTree as ET
import numpy as np

from optparse               import OptionParser
from gensim.models          import word2vec
from nltools                import misc, tokenizer
from align_model            import AlignModel

OUTPUT_DIR  = 'out'

#
# init, cmdline
#

misc.init_app('csv_align')

parser = OptionParser("usage: %prog [options] foo.csv")

parser.add_option ("-v", "--verbose", action="store_true", dest="verbose",
                   help="verbose output")

(options, args) = parser.parse_args()

if options.verbose:
    logging.basicConfig(level=logging.DEBUG)
else:
    logging.basicConfig(level=logging.INFO)

if len(args) != 1:
    parser.print_usage()
    sys.exit(1)
github gooofy / zamia-speech / import_librispeech.py View on Github external
promptsfn = '%s/etc/prompts-original' % dstdir
            transfn = '%s/%s/%s/%s/%s-%s.trans.txt' % (srcdir, subset, speaker, book_id, speaker, book_id)

            with codecs.open (promptsfn, 'w', 'utf8') as promptsf:
                with codecs.open(transfn, 'r', 'utf8') as transf:
                    for line in transf:
                        parts = line.split()
                        promptsf.write(line)

                        flac_src = '%s/%s/%s/%s/%s.flac' % (srcdir, subset, speaker, book_id, parts[0])
                        flac_dst = '%s/flac/%s.flac' % (dstdir, parts[0])

                        logging.debug (' %s -> %s' % (flac_src, flac_dst))

                        misc.symlink(flac_src, flac_dst)


            logging.debug ('%s written.' % promptsfn)
github gooofy / zamia-ai / data-tools / aiml / aiml_align.py View on Github external
nwords = 0

        for word in words:
            if word in index2word_set:
                nwords = nwords+1
                featureVec = np.add(featureVec, model[word])

        if nwords>0:
            featureVec = np.divide(featureVec, nwords)
        return featureVec

#
# init, cmdline
#

misc.init_app('aim2topics')

parser = OptionParser("usage: %prog [options] foo.aiml")

parser.add_option ("-l", "--lang", dest="lang", type = "string", default='en',
                   help="language, default: en")
parser.add_option ("-v", "--verbose", action="store_true", dest="verbose",
                   help="verbose output")

(options, args) = parser.parse_args()

if options.verbose:
    logging.basicConfig(level=logging.DEBUG)
else:
    logging.basicConfig(level=logging.INFO)

if len(args) != 1:
github gooofy / zamia-tts / eval.py View on Github external
VOICE_PATH      = 'data/model/%s'
# VOICE           = 'voice-karlsson-latest'
DEFAULT_VOICE   = 'voice-linda-latest'
BATCH_SIZE      = 32

# x_13036.npy (1, 240) these figures suggest that the agents of the secret service are substantially overworked .

X_FN  = NPDIR + 'x_13036.npy'
X_TXT = 'These figures suggest that the agents of the secret service are substantially overworked.'
X_L   = len(X_TXT)

#
# init
#

misc.init_app(PROC_TITLE)

#
# command line
#

parser = OptionParser("usage: %prog [options] [
github gooofy / zamia-speech / wav2letter_export.py View on Github external
misc.mkdirs('%s/valid' % data_dir)
misc.mkdirs('%s/train' % data_dir)

#
# load dict
#

logging.info("loading lexicon...")
lex = Lexicon(file_name=dictionary)
logging.info("loading lexicon...done.")

#
# language model
#

misc.copy_file('%s/lm.arpa' % language_model_dir, '%s/lm.arpa' % data_dir)


#
# scripts
#

misc.render_template('data/src/speech/w2l_run_train.sh.template', '%s/run_train.sh' % work_dir, w2l_env_activate=w2l_env_activate, w2l_train=w2l_train)
misc.render_template('data/src/speech/w2l_run_decode.sh.template', '%s/run_decode.sh' % work_dir, w2l_env_activate=w2l_env_activate, w2l_train=w2l_train)
misc.mkdirs('%s/config/conv_glu' % work_dir)
misc.render_template('data/src/speech/w2l_config_conv_glu_train.cfg.template', '%s/config/conv_glu/train.cfg' % work_dir, runname=model_name)
misc.copy_file('data/src/speech/w2l_config_conv_glu_network.arch', '%s/config/conv_glu/network.arch' % work_dir)

#
# export audio
#
github gooofy / zamia-speech / speech_kaldi_adapt.py View on Github external
if not pws in ps:
                    ps[pws] = set([p])
                else:
                    ps[pws].add(p)

logging.info ( "%s written." % dictfn2 )

logging.info ( "Exporting dictionary ... done." )

#
# copy phoneme sets from original model
#

misc.copy_file ('%s/data/local/dict/nonsilence_phones.txt' % src_model, '%s/data/local/dict/nonsilence_phones.txt' % dst_dir)
misc.copy_file ('%s/data/local/dict/silence_phones.txt' % src_model,    '%s/data/local/dict/silence_phones.txt' % dst_dir)
misc.copy_file ('%s/data/local/dict/optional_silence.txt' % src_model,  '%s/data/local/dict/optional_silence.txt' % dst_dir)
misc.copy_file ('%s/data/local/dict/extra_questions.txt' % src_model,   '%s/data/local/dict/extra_questions.txt' % dst_dir)

#
# language model / grammar
#

if lm_name.endswith('arpa'):
    misc.copy_file (lm_name, '%s/lm.arpa' % dst_dir)
elif lm_name.endswith('jsgf'):
    misc.copy_file (lm_name, '%s/G.jsgf' % dst_dir)
else:
    misc.copy_file (lm_name, '%s/G.src.fst' % dst_dir)


#
github gooofy / zamia-speech / import_gspv2.py View on Github external
def main(verbose=False):
    """Convert gspv2 corpus to the VoxForge corpus format

    The variable `speech_arc` in ~/.speechrc must point to a folder
    gspv2 which is used as the source containing the original gspv2 corpus, 
    i.e. containing the subfolders dev, test, and train.

    The variable `speech_corpora` in ~/.speechrc must point to a folder
    where the resulting corpus should be written. The script will create
    a subfolder gspv2 here for the resulting voxforge-formatted data.
    """
    misc.init_app('speech_audio_scan')
    config = misc.load_config('.speechrc')

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    speech_arc_dir     = Path(config.get("speech", "speech_arc"))
    speech_corpora_dir = Path(config.get("speech", "speech_corpora"))
    src_root_dir = speech_arc_dir / "gspv2"
    dst_root_dir = speech_corpora_dir / "gspv2"

    exit_if_dst_root_dir_exists(dst_root_dir)

    speakers = set()
    speaker_gender = {}