How to use the nltools.misc.mkdirs function in nltools

To help you get started, we’ve selected a few nltools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github gooofy / zamia-speech / import_ljspeech.py View on Github external
destdir = '%s/ljspeech' % speech_corpora

#
# audio, prompts
#

all_utts = set()

cnt = 0
with open('tmp/run_parallel.sh', 'w') as scriptf:

    folder = 'lindajohnson-11'
    dstdir = '%s/%s' % (destdir, folder)

    misc.mkdirs('%s/wav' % dstdir)
    misc.mkdirs('%s/etc' % dstdir)

    promptsfn = '%s/etc/prompts-original' % dstdir
    logging.debug ('dstdir: %s, promptsfn: %s' % (dstdir, promptsfn))

    with codecs.open ('%s/metadata.csv' % srcdir, 'r', 'utf8') as metaf:

        with codecs.open (promptsfn, 'w', 'utf8') as promptsf:

            for line in metaf:

                logging.debug(line)

                parts = line.strip().split('|')
                if len(parts) != 3:
                    logging.error('malformed line: %s' % line)
                    continue
github gooofy / zamia-ai / data-tools / csv / align_model.py View on Github external
if DEBUG_LIMIT and len(tds)>DEBUG_LIMIT:
                break

        shuffle (self.training_data)

        #
        # set up model dir
        #

        if not incremental:
            try:
                shutil.rmtree(MODEL_DIR)
            except:
                pass

            misc.mkdirs(MODEL_DIR)

        #
        # load or create input/output dicts
        #

        if incremental:
            logging.info("loading input and output dicts...")
            self.load_dicts()

        else:
            logging.info("computing input and output dicts...")

            self.compute_dicts()
            self.save_dicts()

        #
github gooofy / zamia-speech / speech_kaldi_export.py View on Github external
def export_kaldi_data (wav16_dir, audio_corpora, destdirfn, tsdict):
    logging.info ( "Exporting kaldi data to %s..." % destdirfn)

    misc.mkdirs(destdirfn)

    with open(destdirfn+'wav.scp','w') as wavscpf,  \
         open(destdirfn+'utt2spk','w') as utt2spkf, \
         open(destdirfn+'text','w') as textf:

        for utt_id in sorted(tsdict):
            ts = tsdict[utt_id]

            textf.write((u'%s %s\n' % (utt_id, ts['ts'])).encode('utf8'))

            wavscpf.write('%s %s/%s/%s.wav\n' % (utt_id, wav16_dir,
                                                 ts['corpus_name'], utt_id))

            utt2spkf.write('%s %s\n' % (utt_id, ts['spk']))
github gooofy / zamia-speech / speech_kaldi_export.py View on Github external
# config
#

config = misc.load_config ('.speechrc')

kaldi_root = config.get("speech", "kaldi_root")
wav16_dir  = config.get("speech", "wav16")

#
# create basic work dir structure
#

# FIXME: unused, remove misc.mkdirs('%s/lexicon' % data_dir)
misc.mkdirs('%s/local/dict' % data_dir)
misc.mkdirs(wav16_dir)
misc.mkdirs(mfcc_dir)
misc.symlink('../../../../../%s' % language_model_dir, '%s/lm' % work_dir)
misc.symlink('%s/egs/wsj/s5/steps' % kaldi_root, '%s/steps' % work_dir)
misc.symlink('%s/egs/wsj/s5/utils' % kaldi_root, '%s/utils' % work_dir)

#
# generate speech and text corpora
#

logging.info("loading lexicon...")
lex = Lexicon(file_name=dictionary)
logging.info("loading lexicon...done.")

if sequitur_model_path:
    add_all = True
else:
    add_all = False
github gooofy / zamia-speech / import_mozcv1.py View on Github external
cnt = 0
with open('tmp/run_parallel.sh', 'w') as scriptf:
    for csvfn in ['cv-valid-test.csv', 'cv-valid-train.csv', 'cv-valid-dev.csv']:
        with codecs.open('%s/cv_corpus_v1/%s' % (speech_arc, csvfn), 'r', 'utf8') as csvfile:
            r = csv.reader(csvfile, delimiter=',', quotechar='|')
            first = True
            for row in r:
                if first:
                    first = False
                    continue
                print ', '.join(row)
             
                uttid = wavfn = row[0].replace('/', '_').replace('.mp3', '').replace('-', '_')
                spk = uttid

                misc.mkdirs('%s/cv_corpus_v1/%s-v1/etc' % (speech_corpora, spk))
                misc.mkdirs('%s/cv_corpus_v1/%s-v1/wav' % (speech_corpora, spk))

                with codecs.open ('%s/cv_corpus_v1/%s-v1/etc/prompts-original' % (speech_corpora, spk), 'a', 'utf8') as promptsf:
                    promptsf.write('%s %s\n' % (uttid, row[1]))

                wavfn = '%s/cv_corpus_v1/%s-v1/wav/%s.wav' % (speech_corpora, spk, uttid)
                cmd = 'ffmpeg -i %s/cv_corpus_v1/%s %s' % (speech_arc, row[0], wavfn)
                print cnt, wavfn
                scriptf.write('echo %6d %s &\n' % (cnt, wavfn))
                scriptf.write('%s &\n' % cmd)

                cnt += 1
                if (cnt % options.num_cpus) == 0:
                    scriptf.write('wait\n')

cmd = "bash tmp/run_parallel.sh"
github gooofy / zamia-speech / abook-librivox.py View on Github external
# extract wav audios of book(s), if requested
#

for book_id in args[1:]:

    for book in books:
        if book['id'] != book_id:
            continue

        title = book['id'] + '-' + mangle_title(book['title'])

        print book['id'], title, book['totaltime']
        print "    ", book['url_project']

        book_dir = 'abook/in/librivox/%s' % title
        misc.mkdirs(book_dir)

        print "%s created." % book_dir

        url = book['url_zip_file']
        zipfilefn = '%s/%s' % (librivox_zipdir, url[url.rfind("/")+1:])

        print "Extracting audio from zip file %s ..." % zipfilefn

        with ZipFile(zipfilefn, 'r') as zipfile:
      
            mp3s = []
            for mp3fn in sorted (zipfile.namelist()):
                if not mp3fn.endswith('.mp3'):
                    continue
                mp3s.append(mp3fn)
github gooofy / zamia-speech / speech_gen_noisy.py View on Github external
else:
        cfn   = transcripts[ts]['cfn']

    entry = transcripts[cfn]

    if entry['quality']
github adamcsvarga / speaker-clustering / run.py View on Github external
cmd = './cluster_individual.sh wav/%s' % fn
                print "%6d/%6d %s" % (cnt, total, cmd)
                scriptf.write('echo %s\n' %fn)
                scriptf.write('%s &\n' % cmd)

        scriptf.write('wait\n')

    os.system('bash run_parallel.sh')

########################################################
# Get a sample from each file for each cluster         #
########################################################

if stage <= 2:

    misc.mkdirs('sample')

    # # save all clusters appearing in each file
    # for f in data/*; do
    #   fname=`echo "$f" | rev | cut -f1 -d'/' | rev`
    #   echo python get_clust.py ${f}/${fname}.c.3.seg
    #   python get_clust.py ${f}/${fname}.c.3.seg
    # done

    with open ('run_parallel.sh', 'w') as scriptf:

        cnt = 0
        for fn in os.listdir('data'):
            cnt += 1

            if (cnt % nj) == 0:
                scriptf.write('wait\n')
github gooofy / zamia-speech / abook-kaldi-retrieve.py View on Github external
seg_id     = parts[0]
        wavfn      = parts[1]
        wav_id     = os.path.basename(wavfn)
        seg_start  = float(parts[2])
        seg_end    = float(parts[3])

        #
        # create output dir structure if it doesn't exist
        #

        outdirfn = 'abook/out/%s' % os.path.basename(wav_id)

        if not os.path.exists(outdirfn):
            logging.info ('creating %s ...' % outdirfn)
            misc.mkdirs(outdirfn)
            misc.mkdirs('%s/etc' % outdirfn)
            misc.mkdirs('%s/wav' % outdirfn)

        #
        # prompt
        #

        uid = 'de5-%06d' % segcnt
        segcnt += 1

        prompt    = promptsdict[seg_id]
        promptsfn = '%s/etc/prompts-original' % outdirfn
        with codecs.open (promptsfn, 'a', 'utf8') as promptsf:
            promptsf.write(u'%s %s\n' % (uid, prompt))