Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink,
atZeroCrossing=True):
utils.makeDir(outputPath)
wavFN = os.path.split(inputWavFN)[1]
tgFN = os.path.split(inputTGFN)[1]
outputWavFN = join(outputPath, wavFN)
outputTGFN = join(outputPath, tgFN)
if atZeroCrossing is True:
zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
utils.makeDir(zeroCrossingTGPath)
tg = tgio.openTextgrid(inputTGFN)
wavObj = audioio.WavQueryObj(inputWavFN)
praatio_scripts.tgBoundariesToZeroCrossings(tg,
wavObj,
zeroCrossingTGFN)
else:
tg = tgio.openTextgrid(inputTGFN)
keepList = tg.tierDict["phone"].entryList
keepList = [entry for entry in keepList
if not isVowel(entry[2])]
deleteList = utils.invertIntervalList(keepList, tg.maxTimestamp)
wavObj = audioio.openAudioFile(inputWavFN,
keepList=keepList,
def _parseShortTextgrid(data):
'''
Reads a short textgrid file
'''
newTG = Textgrid()
intervalIndicies = [(i, True)
for i in utils.findAll(data, '"IntervalTier"')]
pointIndicies = [(i, False) for i in utils.findAll(data, '"TextTier"')]
indexList = intervalIndicies + pointIndicies
indexList.append((len(data), None)) # The 'end' of the file
indexList.sort()
tupleList = [(indexList[i][0], indexList[i + 1][0], indexList[i][1])
for i in range(len(indexList) - 1)]
# Set the textgrid's min and max times
header = data[:tupleList[0][0]]
headerList = header.split("\n")
tgMin = float(headerList[3].strip())
tgMax = float(headerList[4].strip())
newTG.minTimestamp = tgMin
tmpOutputPath, sampleStep=0.01,
silenceThreshold=0.03, pitchUnit="Hertz",
forceRegenerate=True, undefinedValue=None,
medianFilterWindowSize=0, pitchQuadInterp=False):
'''
Extracts pitch and int from each labeled interval in a textgrid
This has the benefit of being faster than using _extractPIFile if only
labeled regions need to have their pitch values sampled, particularly
for longer files.
Returns the result as a list. Will load the serialized result
if this has already been called on the appropriate files before
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
windowSize = medianFilterWindowSize
assert(os.path.exists(inputFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
utils.makeDir(tmpOutputPath)
splitAudioList = praatio_scripts.splitAudioOnTier(inputFN,
tgFN,
tierName,
tmpOutputPath,
False)
allPIList = []
for start, _, fn in splitAudioList:
tmpTrackName = os.path.splitext(fn)[0] + ".txt"
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink,
atZeroCrossing=True):
utils.makeDir(outputPath)
wavFN = os.path.split(inputWavFN)[1]
tgFN = os.path.split(inputTGFN)[1]
outputWavFN = join(outputPath, wavFN)
outputTGFN = join(outputPath, tgFN)
if atZeroCrossing is True:
zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
utils.makeDir(zeroCrossingTGPath)
tg = tgio.openTextgrid(inputTGFN)
wavObj = audioio.WavQueryObj(inputWavFN)
praatio_scripts.tgBoundariesToZeroCrossings(tg,
wavObj,
def deleteVowels(inputTGFN, inputWavFN, outputPath, doShrink,
atZeroCrossing=True):
utils.makeDir(outputPath)
wavFN = os.path.split(inputWavFN)[1]
tgFN = os.path.split(inputTGFN)[1]
outputWavFN = join(outputPath, wavFN)
outputTGFN = join(outputPath, tgFN)
if atZeroCrossing is True:
zeroCrossingTGPath = join(outputPath, "zero_crossing_tgs")
zeroCrossingTGFN = join(zeroCrossingTGPath, tgFN)
utils.makeDir(zeroCrossingTGPath)
tg = tgio.openTextgrid(inputTGFN)
wavObj = audioio.WavQueryObj(inputWavFN)
praatio_scripts.tgBoundariesToZeroCrossings(tg,
wavObj,
zeroCrossingTGFN)
else:
tg = tgio.openTextgrid(inputTGFN)
keepList = tg.tierDict["phone"].entryList
keepList = [entry for entry in keepList
if not isVowel(entry[2])]
deleteList = utils.invertIntervalList(keepList, tg.maxTimestamp)
'''
Extract pitch at regular intervals from the input wav file
Data is output to a text file and then returned in a list in the form
[(timeV1, pitchV1), (timeV2, pitchV2), ...]
sampleStep - the frequency to sample pitch at
silenceThreshold - segments with lower intensity won't be analyzed
for pitch
forceRegenerate - if running this function for the same file, if False
just read in the existing pitch file
pitchQuadInterp - if True, quadratically interpolate pitch
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
if pitchQuadInterp is True:
doInterpolation = 1
else:
doInterpolation = 0
assert(os.path.exists(wavFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
if os.path.exists(outputFN):
os.remove(outputFN)
argList = [wavFN, outputFN, sampleStep,
minPitch, maxPitch, silenceThreshold,
medianFilterWindowSize, doInterpolation]
def _extractPIFile(inputFN, outputFN, praatEXE,
minPitch, maxPitch, sampleStep=0.01, silenceThreshold=0.03,
pitchUnit="Hertz", forceRegenerate=True,
undefinedValue=None, medianFilterWindowSize=0,
pitchQuadInterp=False):
'''
Extracts pitch and intensity values from an audio file
Returns the result as a list. Will load the serialized result
if this has already been called on the appropriate files before
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
assert(os.path.exists(inputFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
# The praat script uses append mode, so we need to clear any prior
# result
if os.path.exists(outputFN):
os.remove(outputFN)
if pitchQuadInterp is True:
doInterpolation = 1
else:
doInterpolation = 0
argList = [inputFN, outputFN, sampleStep,
labeled regions need to have their pitch values sampled, particularly
for longer files.
Returns the result as a list. Will load the serialized result
if this has already been called on the appropriate files before
'''
outputPath = os.path.split(outputFN)[0]
utils.makeDir(outputPath)
windowSize = medianFilterWindowSize
assert(os.path.exists(inputFN))
firstTime = not os.path.exists(outputFN)
if firstTime or forceRegenerate is True:
utils.makeDir(tmpOutputPath)
splitAudioList = praatio_scripts.splitAudioOnTier(inputFN,
tgFN,
tierName,
tmpOutputPath,
False)
allPIList = []
for start, _, fn in splitAudioList:
tmpTrackName = os.path.splitext(fn)[0] + ".txt"
piList = _extractPIFile(join(tmpOutputPath, fn),
join(tmpOutputPath, tmpTrackName),
praatEXE, minPitch, maxPitch,
sampleStep, silenceThreshold,
pitchUnit, forceRegenerate=True,
medianFilterWindowSize=windowSize,
pitchQuadInterp=pitchQuadInterp)
piList = [("%0.3f" % (float(time) + start), str(pV), str(iV))
def getFormants(praatEXE, inputWavFN, outputTxtFN, maxFormant,
stepSize=0.01, window_length=0.025, preemphasis=50,
scriptFN=None, undefinedValue=None):
'''
Get F1, F2, and F3 for the audio file
maxFormant = 5500 for females, 5000 for males, <8000 for children
'''
if scriptFN is None:
scriptFN = join(utils.scriptsPath, "get_formants.praat")
argList = [inputWavFN, outputTxtFN, stepSize, maxFormant, window_length,
preemphasis, -1, -1]
utils.runPraatScript(praatEXE, scriptFN, argList)
# Load the output
path, fn = os.path.split(outputTxtFN)
dataList = utils.openCSV(path, fn)
# The new praat script includes a header
if dataList[0][0] == "time":
dataList = dataList[1:]
# Handle undefined values, convert values to float
returnList = []
for row in dataList:
keep = True
for i in range(1, 4):
if '--' in row[i]:
if undefinedValue is not None:
def getSpectralInfo(praatEXE, inputWavFN, inputTGFN, outputCSVFN, tierName,
spectralPower=2, spectralMoment=3, scriptFN=None):
'''
Extracts various spectral measures from an audio file
http://www.fon.hum.uva.nl/praat/manual/Spectrum.html
Measures include: center_of_gravity, standard_deviation
skewness, kertosis, central_movement
'''
if scriptFN is None:
scriptFN = join(utils.scriptsPath, "get_spectral_info.praat")
argList = [inputWavFN, inputTGFN, outputCSVFN, tierName,
spectralPower, spectralMoment]
utils.runPraatScript(praatEXE, scriptFN, argList)
# Load the output
with io.open(outputCSVFN, "r", encoding="utf-8") as fd:
data = fd.read()
dataList = data.rstrip().split("\n")
dataList = [row.split(",") for row in dataList]
titleRow, dataList = dataList[0], dataList[1:]
return titleRow, dataList