Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
tmpOutputPath, sampleStep=0.01,
silenceThreshold=0.03, pitchUnit="Hertz",
forceRegenerate=True, undefinedValue=None,
medianFilterWindowSize=0, pitchQuadInterp=False):
'''
Extracts pitch and int from each labeled interval in a textgrid
This has the benefit of being faster than using _extractPIFile if only
labeled regions need to have their pitch values sampled, particularly
for longer files.
Returns the result as a list. Will load the serialized result
if this has already been called on the appropriate files before
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
windowSize = medianFilterWindowSize
assert(os.path.exists(inputFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
utils.makeDir(tmpOutputPath)
splitAudioList = praatio_scripts.splitAudioOnTier(inputFN,
tgFN,
tierName,
tmpOutputPath,
False)
allPIList = []
for start, _, fn in splitAudioList:
tmpTrackName = os.path.splitext(fn)[0] + ".txt"
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink,
atZeroCrossing=True):
utils.makeDir(outputPath)
wavFN = os.path.split(inputWavFN)[1]
tgFN = os.path.split(inputTGFN)[1]
outputWavFN = join(outputPath, wavFN)
outputTGFN = join(outputPath, tgFN)
if atZeroCrossing is True:
zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
utils.makeDir(zeroCrossingTGPath)
tg = tgio.openTextgrid(inputTGFN)
wavObj = audioio.WavQueryObj(inputWavFN)
praatio_scripts.tgBoundariesToZeroCrossings(tg,
wavObj,
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink,
atZeroCrossing=True):
utils.makeDir(outputPath)
wavFN = os.path.split(inputWavFN)[1]
tgFN = os.path.split(inputTGFN)[1]
outputWavFN = join(outputPath, wavFN)
outputTGFN = join(outputPath, tgFN)
if atZeroCrossing is True:
zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
utils.makeDir(zeroCrossingTGPath)
tg = tgio.openTextgrid(inputTGFN)
wavObj = audioio.WavQueryObj(inputWavFN)
praatio_scripts.tgBoundariesToZeroCrossings(tg,
wavObj,
zeroCrossingTGFN)
else:
tg = tgio.openTextgrid(inputTGFN)
keepList = tg.tierDict["phone"].entryList
keepList = [entry for entry in keepList
if not isVowel(entry[2])]
deleteList = utils.invertIntervalList(keepList, tg.maxTimestamp)
'''
Extract pitch at regular intervals from the input wav file
Data is output to a text file and then returned in a list in the form
[(timeV1, pitchV1), (timeV2, pitchV2), ...]
sampleStep - the frequency to sample pitch at
silenceThreshold - segments with lower intensity won't be analyzed
for pitch
forceRegenerate - if running this function for the same file, if False
just read in the existing pitch file
pitchQuadInterp - if True, quadratically interpolate pitch
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
if pitchQuadInterp is True:
doInterpolation = 1
else:
doInterpolation = 0
assert(os.path.exists(wavFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
if os.path.exists(outputFN):
os.remove(outputFN)
argList = [wavFN, outputFN, sampleStep,
minPitch, maxPitch, silenceThreshold,
medianFilterWindowSize, doInterpolation]
def _extractPIFile(inputFN, outputFN, praatEXE,
minPitch, maxPitch, sampleStep=0.01, silenceThreshold=0.03,
pitchUnit="Hertz", forceRegenerate=True,
undefinedValue=None, medianFilterWindowSize=0,
pitchQuadInterp=False):
'''
Extracts pitch and intensity values from an audio file
Returns the result as a list. Will load the serialized result
if this has already been called on the appropriate files before
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
assert(os.path.exists(inputFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
# The praat script uses append mode, so we need to clear any prior
# result
if os.path.exists(outputFN):
os.remove(outputFN)
if pitchQuadInterp is True:
doInterpolation = 1
else:
doInterpolation = 0
argList = [inputFN, outputFN, sampleStep,
labeled regions need to have their pitch values sampled, particularly
for longer files.
Returns the result as a list. Will load the serialized result
if this has already been called on the appropriate files before
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
windowSize = medianFilterWindowSize
assert(os.path.exists(inputFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
utils.makeDir(tmpOutputPath)
splitAudioList = praatio_scripts.splitAudioOnTier(inputFN,
tgFN,
tierName,
tmpOutputPath,
False)
allPIList = []
for start, _, fn in splitAudioList:
tmpTrackName = os.path.splitext(fn)[0] + ".txt"
piList = _extractPIFile(join(tmpOutputPath, fn),
join(tmpOutputPath, tmpTrackName),
praatEXE, minPitch, maxPitch,
sampleStep, silenceThreshold,
pitchUnit, forceRegenerate=True,
medianFilterWindowSize=windowSize,
pitchQuadInterp=pitchQuadInterp)
piList = [("%0.3f" % (float(time) + start), str(pV), str(iV))
wavPath = os.path.abspath(join(".", "files"))
tgPath = os.path.abspath(join(".", "files"))
rootOutputFolder = os.path.abspath(join(".", "files", "pitch_extraction"))
pitchPath = join(rootOutputFolder, "pitch")
formantsPath = join(rootOutputFolder, "formants")
pitchMeasuresPath = join(rootOutputFolder, "pitch_measures")
rmsIntensityPath = join(rootOutputFolder, "rms_intensity")
praatEXE = r"C:\Praat.exe"
#praatEXE = "/Applications/Praat.app/Contents/MacOS/Praat"
utils.makeDir(rootOutputFolder)
utils.makeDir(pitchPath)
utils.makeDir(pitchMeasuresPath)
utils.makeDir(rmsIntensityPath)
utils.makeDir(formantsPath)
bobbyPitchData = pitch_and_intensity.extractPI(join(wavPath, "bobby.wav"),
join(pitchPath, "bobby.txt"),
praatEXE, 50, 350,
forceRegenerate=False)
# Here are two examples of the new functionality of extracting pitch
# from only labeled intervals in a textgrid. However, the example files
# I have provided are too short and praat will not process them.
# Extracts each labeled interval as a separate wave file, extracts the
# pitch track from each of those, and then aggregates the result.
# pitch_and_intensity.extractPI(join(wavPath, "bobby.wav"),
# join(pitchPath, "bobby_segments.txt"),
# praatEXE, 50, 350,
# forceRegenerate=True,
def autoSegmentSpeech(praatEXE, inputWavPath, rawTGPath, finalTGPath):
utils.makeDir(finalTGPath)
praat_scripts.annotateSilences(praatEXE, inputWavPath, rawTGPath)
for tgFN in utils.findFiles(rawTGPath, filterExt=".TextGrid"):
markTranscriptForAnnotations(join(rawTGPath, tgFN),
"silences",
join(finalTGPath, tgFN))