Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def jyutping(jpString):
"""
parses jpString as a list of Cantonese romanization jyutping strings and
outputs a list of 4-tuples, each as (onset, nucleus, coda, tone)
"""
## check jpString as a valid argument string
if type(jpString) is not str:
raise JyutpingError('argument needs to be a string -- ' + repr(jp))
jpString = jpString.lower()
## parse jpString as multiple jp strings
jpList = list()
jpCurrent = ''
for c in jpString:
jpCurrent = jpCurrent + c
if c.isdigit():
jpList.append(jpCurrent)
jpCurrent = ''
if not jpString[-1].isdigit():
raise JyutpingError('tone error -- ' + repr(jp))
jpParsedList = list()
## check jpString as a valid argument string
if type(jpString) is not str:
raise JyutpingError('argument needs to be a string -- ' + repr(jp))
jpString = jpString.lower()
## parse jpString as multiple jp strings
jpList = list()
jpCurrent = ''
for c in jpString:
jpCurrent = jpCurrent + c
if c.isdigit():
jpList.append(jpCurrent)
jpCurrent = ''
if not jpString[-1].isdigit():
raise JyutpingError('tone error -- ' + repr(jp))
jpParsedList = list()
for jp in jpList:
if len(jp) < 2:
raise JyutpingError('argument string needs to contain '
'at least 2 characters -- ' + repr(jp))
## tone
if (not jp[-1].isdigit()) or (jp[-1] not in TONE):
raise JyutpingError('tone error -- ' + repr(jp))
tone = jp[-1]
cvc = jp[:-1]
jpCurrent = ''
if not jpString[-1].isdigit():
raise JyutpingError('tone error -- ' + repr(jp))
jpParsedList = list()
for jp in jpList:
if len(jp) < 2:
raise JyutpingError('argument string needs to contain '
'at least 2 characters -- ' + repr(jp))
## tone
if (not jp[-1].isdigit()) or (jp[-1] not in TONE):
raise JyutpingError('tone error -- ' + repr(jp))
tone = jp[-1]
cvc = jp[:-1]
## coda
if not (cvc[-1] in 'ieaouptkmng'):
raise JyutpingError('coda error -- ' + repr(jp))
if cvc in ['m', 'ng', 'i', 'e', 'aa', 'o', 'u']:
jpParsedList.append(('', cvc, '', tone))
continue
elif cvc[-2:] == 'ng':
coda = 'ng'
cv = cvc[:-2]
elif (cvc[-1] in 'ptkmn') or \
((cvc[-1] == 'i') and (cvc[-2] in 'eaou')) or \
for jp in jpList:
if len(jp) < 2:
raise JyutpingError('argument string needs to contain '
'at least 2 characters -- ' + repr(jp))
## tone
if (not jp[-1].isdigit()) or (jp[-1] not in TONE):
raise JyutpingError('tone error -- ' + repr(jp))
tone = jp[-1]
cvc = jp[:-1]
## coda
if not (cvc[-1] in 'ieaouptkmng'):
raise JyutpingError('coda error -- ' + repr(jp))
if cvc in ['m', 'ng', 'i', 'e', 'aa', 'o', 'u']:
jpParsedList.append(('', cvc, '', tone))
continue
elif cvc[-2:] == 'ng':
coda = 'ng'
cv = cvc[:-2]
elif (cvc[-1] in 'ptkmn') or \
((cvc[-1] == 'i') and (cvc[-2] in 'eaou')) or \
((cvc[-1] == 'u') and (cvc[-2] in 'ieao')):
coda = cvc[-1]
cv = cvc[:-1]
else:
coda = ''
cv = cvc
validFinal = False
for i in range(1, len(what_final)+1):
possibleNucleus = what_final[: i]
possibleCoda = what_final[i :]
if (possibleNucleus in NUCLEUS) and (possibleCoda in CODA):
validFinal = True
what_nucleus = possibleNucleus
what_coda = possibleCoda
break
if validFinal:
return search_jp(corpus, [(what_nucleus, 1), (what_coda, 2)], output)
else:
raise JyutpingError('final error -- ' + repr(what_final))
while cv[-1] in 'ieaouy':
nucleus = cv[-1] + nucleus
cv = cv[:-1]
if not cv:
break
if not nucleus:
raise JyutpingError('nucleus error -- ' + repr(jp))
onset = cv
if onset not in ONSET:
raise JyutpingError('onset error -- ' + repr(jp))
jpParsedList.append((onset, nucleus, coda, tone))
return jpParsedList