Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""Constructor.
Args:
alphabet: edit alphabet (an iterable of strings).
insert_cost: the cost for the insertion operation.
delete_cost: the cost for the deletion operation.
substitute_cost: the cost for the substitution operation.
"""
# Left factor; note that we divide the edit costs by two because they also
# will be incurred when traversing the right factor.
match = union(*alphabet).optimize(True)
i_insert = transducer("", "[{}]".format(self.INSERT),
weight=insert_cost / 2).optimize(True)
i_delete = transducer(match, "[{}]".format(self.DELETE),
weight=delete_cost / 2).optimize(True)
i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
weight=substitute_cost / 2).optimize(True)
i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
# Right factor; this is constructed by inverting the left factor (i.e.,
# swapping the input and output labels), then swapping the insert and delete
# labels on what is now the input side.
o_ops = invert(i_ops)
syms = o_ops.input_symbols()
insert_label = syms.find(self.INSERT)
delete_label = syms.find(self.DELETE)
o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
(delete_label, insert_label)))
# Computes the closure for both sets of ops.
self._e_i = i_ops.closure().optimize(True)
self._e_o = o_ops.closure().optimize(True)
alphabet,
insert_cost=DEFAULT_INSERT_COST,
delete_cost=DEFAULT_DELETE_COST,
substitute_cost=DEFAULT_SUBSTITUTE_COST):
"""Constructor.
Args:
alphabet: edit alphabet (an iterable of strings).
insert_cost: the cost for the insertion operation.
delete_cost: the cost for the deletion operation.
substitute_cost: the cost for the substitution operation.
"""
# Left factor; note that we divide the edit costs by two because they also
# will be incurred when traversing the right factor.
match = union(*alphabet).optimize(True)
i_insert = transducer("", "[{}]".format(self.INSERT),
weight=insert_cost / 2).optimize(True)
i_delete = transducer(match, "[{}]".format(self.DELETE),
weight=delete_cost / 2).optimize(True)
i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
weight=substitute_cost / 2).optimize(True)
i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
# Right factor; this is constructed by inverting the left factor (i.e.,
# swapping the input and output labels), then swapping the insert and delete
# labels on what is now the input side.
o_ops = invert(i_ops)
syms = o_ops.input_symbols()
insert_label = syms.find(self.INSERT)
delete_label = syms.find(self.DELETE)
o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
(delete_label, insert_label)))
# Computes the closure for both sets of ops.
def __init__(self,
alphabet,
insert_cost=DEFAULT_INSERT_COST,
delete_cost=DEFAULT_DELETE_COST,
substitute_cost=DEFAULT_SUBSTITUTE_COST):
"""Constructor.
Args:
alphabet: edit alphabet (an iterable of strings).
insert_cost: the cost for the insertion operation.
delete_cost: the cost for the deletion operation.
substitute_cost: the cost for the substitution operation.
"""
# Left factor; note that we divide the edit costs by two because they also
# will be incurred when traversing the right factor.
match = union(*alphabet).optimize(True)
i_insert = transducer("", "[{}]".format(self.INSERT),
weight=insert_cost / 2).optimize(True)
i_delete = transducer(match, "[{}]".format(self.DELETE),
weight=delete_cost / 2).optimize(True)
i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
weight=substitute_cost / 2).optimize(True)
i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
# Right factor; this is constructed by inverting the left factor (i.e.,
# swapping the input and output labels), then swapping the insert and delete
# labels on what is now the input side.
o_ops = invert(i_ops)
syms = o_ops.input_symbols()
insert_label = syms.find(self.INSERT)
delete_label = syms.find(self.DELETE)
o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
(delete_label, insert_label)))
Args:
alphabet: edit alphabet (an iterable of strings).
insert_cost: the cost for the insertion operation.
delete_cost: the cost for the deletion operation.
substitute_cost: the cost for the substitution operation.
"""
# Left factor; note that we divide the edit costs by two because they also
# will be incurred when traversing the right factor.
match = union(*alphabet).optimize(True)
i_insert = transducer("", "[{}]".format(self.INSERT),
weight=insert_cost / 2).optimize(True)
i_delete = transducer(match, "[{}]".format(self.DELETE),
weight=delete_cost / 2).optimize(True)
i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
weight=substitute_cost / 2).optimize(True)
i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
# Right factor; this is constructed by inverting the left factor (i.e.,
# swapping the input and output labels), then swapping the insert and delete
# labels on what is now the input side.
o_ops = invert(i_ops)
syms = o_ops.input_symbols()
insert_label = syms.find(self.INSERT)
delete_label = syms.find(self.DELETE)
o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
(delete_label, insert_label)))
# Computes the closure for both sets of ops.
self._e_i = i_ops.closure().optimize(True)
self._e_o = o_ops.closure().optimize(True)
def check_wellformed_lattice(lattice):
"""Raises an error if the lattice is empty.
Args:
lattice: A lattice FST.
Raises:
Error: Lattice is empty.
"""
if lattice.start() == NO_STATE_ID:
raise Error("Lattice is empty")
substitute_cost: the cost for the substitution operation.
"""
# Left factor; note that we divide the edit costs by two because they also
# will be incurred when traversing the right factor.
match = union(*alphabet).optimize(True)
i_insert = transducer("", "[{}]".format(self.INSERT),
weight=insert_cost / 2).optimize(True)
i_delete = transducer(match, "[{}]".format(self.DELETE),
weight=delete_cost / 2).optimize(True)
i_substitute = transducer(match, "[{}]".format(self.SUBSTITUTE),
weight=substitute_cost / 2).optimize(True)
i_ops = union(match, i_insert, i_delete, i_substitute).optimize(True)
# Right factor; this is constructed by inverting the left factor (i.e.,
# swapping the input and output labels), then swapping the insert and delete
# labels on what is now the input side.
o_ops = invert(i_ops)
syms = o_ops.input_symbols()
insert_label = syms.find(self.INSERT)
delete_label = syms.find(self.DELETE)
o_ops.relabel_pairs(ipairs=((insert_label, delete_label),
(delete_label, insert_label)))
# Computes the closure for both sets of ops.
self._e_i = i_ops.closure().optimize(True)
self._e_o = o_ops.closure().optimize(True)
"""Computes minimum distance.
This method computes, for a pair of input/output strings or acceptors, the
minimum edit distance according to the underlying edit transducer.
Args:
iset: input string or acceptor.
oset: output string or acceptor.
Returns:
Minimum edit distance according to the edit transducer.
"""
lattice = self._create_lattice(iset, oset)
# The shortest cost from all final states to the start state is
# equivalent to the cost of the shortest path.
return float(shortestdistance(lattice, reverse=True)[lattice.start()])
in the lexicon according to the underlying edit transducer. In the case of
a tie (i.e., where there are multiple closest strings), only one will be
returned; tie breaking is deterministic but difficult to reason about and
thus should be considered unspecified.) The `closest_matches` method can be
used to enumerate all the ties.
Args:
query: input string or acceptor.
Returns:
The closest string in the lexicon.
"""
lattice = self._create_levenshtein_automaton_lattice(query)
# For implementation reasons, the shortest path (when k = 1) is in reverse
# state order, so we perform a topological sort ahead of time.
return shortestpath(lattice).topsort().stringify()
def __init__(self,
alphabet,
lexicon,
insert_cost=DEFAULT_INSERT_COST,
delete_cost=DEFAULT_DELETE_COST,
substitute_cost=DEFAULT_SUBSTITUTE_COST):
super(LevenshteinAutomaton, self).__init__(alphabet, insert_cost,
delete_cost, substitute_cost)
# Compiles lexicon and composes the right factor with it.
compiled_lexicon = string_map(lexicon)
self._l_o = self._e_o @ compiled_lexicon
self._l_o.optimize(True)