How to use pynini - 9 common examples

To help you get started, we’ve selected a few pynini examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
"""Constructor.

    Args:
      alphabet: edit alphabet (an iterable of strings).
      insert_cost: the cost for the insertion operation.
      delete_cost: the cost for the deletion operation.
      substitute_cost: the cost for the substitution operation.
    """
    # Left factor; note that we divide the edit costs by two because they also
    # will be incurred when traversing the right factor.
    match = union(*alphabet).optimize(True)
    i_insert = transducer("", "[{}]".format(self.INSERT),
                          weight=insert_cost / 2).optimize(True)
    i_delete = transducer(match, "[{}]".format(self.DELETE),
                          weight=delete_cost / 2).optimize(True)
    i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
                              weight=substitute_cost / 2).optimize(True)
    i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
    # Right factor; this is constructed by inverting the left factor (i.e.,
    # swapping the input and output labels), then swapping the insert and delete
    # labels on what is now the input side.
    o_ops = invert(i_ops)
    syms = o_ops.input_symbols()
    insert_label = syms.find(self.INSERT)
    delete_label = syms.find(self.DELETE)
    o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
                                (delete_label, insert_label)))
    # Computes the closure for both sets of ops.
    self._e_i = i_ops.closure().optimize(True)
    self._e_o = o_ops.closure().optimize(True)
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
alphabet,
               insert_cost=DEFAULT_INSERT_COST,
               delete_cost=DEFAULT_DELETE_COST,
               substitute_cost=DEFAULT_SUBSTITUTE_COST):
    """Constructor.

    Args:
      alphabet: edit alphabet (an iterable of strings).
      insert_cost: the cost for the insertion operation.
      delete_cost: the cost for the deletion operation.
      substitute_cost: the cost for the substitution operation.
    """
    # Left factor; note that we divide the edit costs by two because they also
    # will be incurred when traversing the right factor.
    match = union(*alphabet).optimize(True)
    i_insert = transducer("", "[{}]".format(self.INSERT),
                          weight=insert_cost / 2).optimize(True)
    i_delete = transducer(match, "[{}]".format(self.DELETE),
                          weight=delete_cost / 2).optimize(True)
    i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
                              weight=substitute_cost / 2).optimize(True)
    i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
    # Right factor; this is constructed by inverting the left factor (i.e.,
    # swapping the input and output labels), then swapping the insert and delete
    # labels on what is now the input side.
    o_ops = invert(i_ops)
    syms = o_ops.input_symbols()
    insert_label = syms.find(self.INSERT)
    delete_label = syms.find(self.DELETE)
    o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
                                (delete_label, insert_label)))
    # Computes the closure for both sets of ops.
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
def __init__(self,
               alphabet,
               insert_cost=DEFAULT_INSERT_COST,
               delete_cost=DEFAULT_DELETE_COST,
               substitute_cost=DEFAULT_SUBSTITUTE_COST):
    """Constructor.

    Args:
      alphabet: edit alphabet (an iterable of strings).
      insert_cost: the cost for the insertion operation.
      delete_cost: the cost for the deletion operation.
      substitute_cost: the cost for the substitution operation.
    """
    # Left factor; note that we divide the edit costs by two because they also
    # will be incurred when traversing the right factor.
    match = union(*alphabet).optimize(True)
    i_insert = transducer("", "[{}]".format(self.INSERT),
                          weight=insert_cost / 2).optimize(True)
    i_delete = transducer(match, "[{}]".format(self.DELETE),
                          weight=delete_cost / 2).optimize(True)
    i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
                              weight=substitute_cost / 2).optimize(True)
    i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
    # Right factor; this is constructed by inverting the left factor (i.e.,
    # swapping the input and output labels), then swapping the insert and delete
    # labels on what is now the input side.
    o_ops = invert(i_ops)
    syms = o_ops.input_symbols()
    insert_label = syms.find(self.INSERT)
    delete_label = syms.find(self.DELETE)
    o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
                                (delete_label, insert_label)))
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
Args:
      alphabet: edit alphabet (an iterable of strings).
      insert_cost: the cost for the insertion operation.
      delete_cost: the cost for the deletion operation.
      substitute_cost: the cost for the substitution operation.
    """
    # Left factor; note that we divide the edit costs by two because they also
    # will be incurred when traversing the right factor.
    match = union(*alphabet).optimize(True)
    i_insert = transducer("", "[{}]".format(self.INSERT),
                          weight=insert_cost / 2).optimize(True)
    i_delete = transducer(match, "[{}]".format(self.DELETE),
                          weight=delete_cost / 2).optimize(True)
    i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
                              weight=substitute_cost / 2).optimize(True)
    i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
    # Right factor; this is constructed by inverting the left factor (i.e.,
    # swapping the input and output labels), then swapping the insert and delete
    # labels on what is now the input side.
    o_ops = invert(i_ops)
    syms = o_ops.input_symbols()
    insert_label = syms.find(self.INSERT)
    delete_label = syms.find(self.DELETE)
    o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
                                (delete_label, insert_label)))
    # Computes the closure for both sets of ops.
    self._e_i = i_ops.closure().optimize(True)
    self._e_o = o_ops.closure().optimize(True)
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
def check_wellformed_lattice(lattice):
    """Raises an error if the lattice is empty.

    Args:
      lattice: A lattice FST.

    Raises:
      Error: Lattice is empty.
    """
    if lattice.start() == NO_STATE_ID:
      raise Error("Lattice is empty")
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
substitute_cost: the cost for the substitution operation.
    """
    # Left factor; note that we divide the edit costs by two because they also
    # will be incurred when traversing the right factor.
    match = union(*alphabet).optimize(True)
    i_insert = transducer("", "[{}]".format(self.INSERT),
                          weight=insert_cost / 2).optimize(True)
    i_delete = transducer(match, "[{}]".format(self.DELETE),
                          weight=delete_cost / 2).optimize(True)
    i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
                              weight=substitute_cost / 2).optimize(True)
    i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
    # Right factor; this is constructed by inverting the left factor (i.e.,
    # swapping the input and output labels), then swapping the insert and delete
    # labels on what is now the input side.
    o_ops = invert(i_ops)
    syms = o_ops.input_symbols()
    insert_label = syms.find(self.INSERT)
    delete_label = syms.find(self.DELETE)
    o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
                                (delete_label, insert_label)))
    # Computes the closure for both sets of ops.
    self._e_i = i_ops.closure().optimize(True)
    self._e_o = o_ops.closure().optimize(True)
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
"""Computes minimum distance.

    This method computes, for a pair of input/output strings or acceptors, the
    minimum edit distance according to the underlying edit transducer.

    Args:
      iset: input string or acceptor.
      oset: output string or acceptor.

    Returns:
      Minimum edit distance according to the edit transducer.
    """
    lattice = self._create_lattice(iset, oset)
    # The shortest cost from all final states to the start state is
    # equivalent to the cost of the shortest path.
    return float(shortestdistance(lattice, reverse=True)[lattice.start()])
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
in the lexicon according to the underlying edit transducer. In the case of
    a tie (i.e., where there are multiple closest strings), only one will be
    returned; tie breaking is deterministic but difficult to reason about and
    thus should be considered unspecified.) The `closest_matches` method can be
    used to enumerate all the ties.

    Args:
      query: input string or acceptor.

    Returns:
      The closest string in the lexicon.
    """
    lattice = self._create_levenshtein_automaton_lattice(query)
    # For implementation reasons, the shortest path (when k = 1) is in reverse
    # state order, so we perform a topological sort ahead of time.
    return shortestpath(lattice).topsort().stringify()
github kylebgorman / EditTransducer / edit_transducer / edit_transducer.py View on Github external
def __init__(self,
               alphabet,
               lexicon,
               insert_cost=DEFAULT_INSERT_COST,
               delete_cost=DEFAULT_DELETE_COST,
               substitute_cost=DEFAULT_SUBSTITUTE_COST):
    super(LevenshteinAutomaton, self).__init__(alphabet, insert_cost,
                                               delete_cost, substitute_cost)
    # Compiles lexicon and composes the right factor with it.
    compiled_lexicon = string_map(lexicon)
    self._l_o = self._e_o @ compiled_lexicon
    self._l_o.optimize(True)

pynini

Finite-state grammar compilation

MIT
Latest version published 4 months ago

Package Health Score

82 / 100
Full package analysis