How to use the pycantonese.JyutpingError function in pycantonese

To help you get started, we’ve selected a few pycantonese examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jacksonllee / pycantonese / pycantonese.py View on Github external
def jyutping(jpString):
    """
    parses jpString as a list of Cantonese romanization jyutping strings and
    outputs a list of 4-tuples, each as (onset, nucleus, coda, tone)

    """

    ## check jpString as a valid argument string
    if type(jpString) is not str:
        raise JyutpingError('argument needs to be a string -- ' + repr(jp))
    jpString = jpString.lower()

    ## parse jpString as multiple jp strings
    jpList = list()
    jpCurrent = ''
    for c in jpString:
        jpCurrent = jpCurrent + c
        if c.isdigit():
            jpList.append(jpCurrent)
            jpCurrent = ''

    if not jpString[-1].isdigit():
        raise JyutpingError('tone error -- ' + repr(jp))

    jpParsedList = list()
github jacksonllee / pycantonese / pycantonese.py View on Github external
## check jpString as a valid argument string
    if type(jpString) is not str:
        raise JyutpingError('argument needs to be a string -- ' + repr(jp))
    jpString = jpString.lower()

    ## parse jpString as multiple jp strings
    jpList = list()
    jpCurrent = ''
    for c in jpString:
        jpCurrent = jpCurrent + c
        if c.isdigit():
            jpList.append(jpCurrent)
            jpCurrent = ''

    if not jpString[-1].isdigit():
        raise JyutpingError('tone error -- ' + repr(jp))

    jpParsedList = list()

    for jp in jpList:

        if len(jp) < 2:
            raise JyutpingError('argument string needs to contain '
                                'at least 2 characters -- ' + repr(jp))

        ## tone
        if (not jp[-1].isdigit()) or (jp[-1] not in TONE):
            raise JyutpingError('tone error -- ' + repr(jp))

        tone = jp[-1]
        cvc = jp[:-1]
github jacksonllee / pycantonese / pycantonese.py View on Github external
jpCurrent = ''

    if not jpString[-1].isdigit():
        raise JyutpingError('tone error -- ' + repr(jp))

    jpParsedList = list()

    for jp in jpList:

        if len(jp) < 2:
            raise JyutpingError('argument string needs to contain '
                                'at least 2 characters -- ' + repr(jp))

        ## tone
        if (not jp[-1].isdigit()) or (jp[-1] not in TONE):
            raise JyutpingError('tone error -- ' + repr(jp))

        tone = jp[-1]
        cvc = jp[:-1]

        ## coda
        if not (cvc[-1] in 'ieaouptkmng'):
            raise JyutpingError('coda error -- ' + repr(jp))

        if cvc in ['m', 'ng', 'i', 'e', 'aa', 'o', 'u']:
            jpParsedList.append(('', cvc, '', tone))
            continue
        elif cvc[-2:] == 'ng':
            coda = 'ng'
            cv = cvc[:-2]
        elif (cvc[-1] in 'ptkmn') or \
             ((cvc[-1] == 'i') and (cvc[-2] in 'eaou')) or \
github jacksonllee / pycantonese / pycantonese.py View on Github external
for jp in jpList:

        if len(jp) < 2:
            raise JyutpingError('argument string needs to contain '
                                'at least 2 characters -- ' + repr(jp))

        ## tone
        if (not jp[-1].isdigit()) or (jp[-1] not in TONE):
            raise JyutpingError('tone error -- ' + repr(jp))

        tone = jp[-1]
        cvc = jp[:-1]

        ## coda
        if not (cvc[-1] in 'ieaouptkmng'):
            raise JyutpingError('coda error -- ' + repr(jp))

        if cvc in ['m', 'ng', 'i', 'e', 'aa', 'o', 'u']:
            jpParsedList.append(('', cvc, '', tone))
            continue
        elif cvc[-2:] == 'ng':
            coda = 'ng'
            cv = cvc[:-2]
        elif (cvc[-1] in 'ptkmn') or \
             ((cvc[-1] == 'i') and (cvc[-2] in 'eaou')) or \
             ((cvc[-1] == 'u') and (cvc[-2] in 'ieao')):
            coda = cvc[-1]
            cv = cvc[:-1]
        else:
            coda = ''
            cv = cvc
github jacksonllee / pycantonese / pycantonese.py View on Github external
validFinal = False

    for i in range(1, len(what_final)+1):
        possibleNucleus = what_final[: i]
        possibleCoda = what_final[i :]

        if (possibleNucleus in NUCLEUS) and (possibleCoda in CODA):
            validFinal = True
            what_nucleus = possibleNucleus
            what_coda = possibleCoda
            break

    if validFinal:
        return search_jp(corpus, [(what_nucleus, 1), (what_coda, 2)], output)
    else:
        raise JyutpingError('final error -- ' + repr(what_final))
github jacksonllee / pycantonese / pycantonese.py View on Github external
while cv[-1] in 'ieaouy':
            nucleus = cv[-1] + nucleus
            cv = cv[:-1]
            if not cv:
                break

        if not nucleus:
            raise JyutpingError('nucleus error -- ' + repr(jp))

        onset = cv



        if onset not in ONSET:
            raise JyutpingError('onset error -- ' + repr(jp))

        jpParsedList.append((onset, nucleus, coda, tone))

    return jpParsedList