How to use the epitran.puncnorm.PuncNorm function in epitran

To help you get started, we’ve selected a few epitran examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dmort27 / epitran / epitran / flite.py View on Github external
def __init__(self, arpabet='arpabet', ligatures=False, cedict_file=None):
        """Construct a Flite "wrapper"

        Args:
            arpabet (str): file containing ARPAbet to IPA mapping
            ligatures (bool): if True, use non-standard ligatures instead of
                              standard IPA
            cedict_filename (str): path to CC-CEDict dictionary (included for
                                   compatibility)
        """
        arpabet = pkg_resources.resource_filename(__name__, os.path.join('data', arpabet + '.csv'))
        self.arpa_map = self._read_arpabet(arpabet)
        self.chunk_re = re.compile(r"([A-Za-z'’]+|[^A-Za-z'’]+)", re.U)
        self.letter_re = re.compile(r"[A-Za-z'’]+")
        self.regexp = re.compile(r'[A-Za-z]')
        self.puncnorm = PuncNorm()
        self.ligatures = ligatures
        self.ft = panphon.FeatureTable()
        self.num_panphon_fts = len(self.ft.names)
github dmort27 / epitran / epitran / _epitran.py View on Github external
preproc (bool): apply preprocessors
            postproc (bool): apply prostprocessors
            ligatures (bool): use precomposed ligatures instead of standard IPA
            cedict_filename (str): path to file containing the CC-CEDict
                                   dictionary; relevant only for Chinese
            rev (boolean): if True, load reverse transliteration
            rev_preproc (bool): if True, apply preprocessor when reverse transliterating
            rev_postproc (bool): if True, apply postprocessor when reverse transliterating
        """
        if code in self.special:
            self.epi = self.special[code](ligatures=ligatures, cedict_file=cedict_file)
        else:
            self.epi = SimpleEpitran(code, preproc, postproc, ligatures, rev, rev_preproc, rev_postproc)
        self.ft = panphon.featuretable.FeatureTable()
        self.xsampa = XSampa()
        self.puncnorm = PuncNorm()
github dmort27 / epitran / epitran / simple.py View on Github external
"""Constructs the backend object epitran uses for most languages

        Args:
            code (str): ISO 639-3 code and ISO 15924 code joined with a hyphen
            preproc (bool): if True, apply preprocessor
            postproc (bool): if True, apply postprocessors
            ligatures (bool): if True, use phonetic ligatures for affricates
                              instead of standard IPA
            rev (bool): if True, load reverse transliteration
            rev_preproc (bool): if True, apply preprocessor when reverse transliterating
            rev_postproc (bool): if True, apply postprocessor when reverse transliterating
        """
        self.rev = rev
        self.g2p = self._load_g2p_map(code, False)
        self.regexp = self._construct_regex(self.g2p.keys())
        self.puncnorm = PuncNorm()
        self.ft = panphon.FeatureTable()
        self.num_panphon_fts = len(self.ft.names)
        self.preprocessor = PrePostProcessor(code, 'pre', False)
        self.postprocessor = PrePostProcessor(code, 'post', False)
        self.strip_diacritics = StripDiacritics(code)
        self.preproc = preproc
        self.postproc = postproc
        self.ligatures = ligatures
        self.rev_preproc = rev_preproc
        self.rev_postproc = rev_postproc
        if rev:
            self.rev_g2p = self._load_g2p_map(code, True)
            self.rev_regexp = self._construct_regex(self.rev_g2p.keys())
            self.rev_preprocessor = PrePostProcessor(code, 'pre', True)
            self.rev_postprocessor = PrePostProcessor(code, 'post', True)
github dmort27 / epitran / epitran / backoff.py View on Github external
def __init__(self, lang_script_codes, cedict_file=None):
        """Construct a Backoff object.

        Args:
            lang_script_codes (list): codes for languages to try, starting
            with the highest priority languages
            cedict_file (str): path to the CC-CEdict dictionary file
            (necessary only when cmn-Hans or cmn-Hant are used)
        """
        self.langs = [_epitran.Epitran(c, cedict_file=cedict_file)
                      for c in lang_script_codes]
        self.num_re = re.compile(r'\p{Number}+')
        self.ft = panphon.featuretable.FeatureTable()
        self.xsampa = XSampa()
        self.puncnorm = PuncNorm()
        self.dias = [StripDiacritics(c) for c in lang_script_codes]