Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _load_g2p_map(self, code, rev):
"""Load the code table for the specified language.
Args:
code (str): ISO 639-3 code plus "-" plus ISO 15924 code for the
language/script to be loaded
rev (boolean): True for reversing the table (for reverse transliterating)
"""
g2p = defaultdict(list)
gr_by_line = defaultdict(list)
code += '_rev' if rev else ''
try:
path = os.path.join('data', 'map', code + '.csv')
path = pkg_resources.resource_filename(__name__, path)
except IndexError:
raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
orth, phon = next(reader)
if orth != 'Orth' or phon != 'Phon':
raise DatafileError('Header is ["{}", "{}"] instead of ["Orth", "Phon"].'.format(orth, phon))
for (i, fields) in enumerate(reader):
try:
graph, phon = fields
except ValueError:
raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
graph = unicodedata.normalize('NFD', graph)
phon = unicodedata.normalize('NFD', phon)
g2p[graph].append(phon)
gr_by_line[graph].append(i)
if self._one_to_many_gr_by_line_map(g2p):
graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
else:
line = self._sub_symbols(line)
r = re.match(r'(\S+)\s*->\s*(\S+)\s*/\s*(\S*)\s*[_]\s*(\S*)', line)
try:
a, b, X, Y = r.groups()
except AttributeError:
raise DatafileError('Line {}: "{}" cannot be parsed.'.format(i + 1, line))
X, Y = X.replace('#', '^'), Y.replace('#', '$')
a, b = a.replace('0', ''), b.replace('0', '')
try:
if re.search(r'[?]P[<]sw1[>].+[?]P[<]sw2[>]', a):
return self._fields_to_function_metathesis(a, X, Y)
else:
return self._fields_to_function(a, b, X, Y)
except Exception as e:
raise DatafileError('Line {}: "{}" cannot be compiled as regex: ̪{}'.format(i + 1, line, e))
code += '_rev' if rev else ''
try:
path = os.path.join('data', 'map', code + '.csv')
path = pkg_resources.resource_filename(__name__, path)
except IndexError:
raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
orth, phon = next(reader)
if orth != 'Orth' or phon != 'Phon':
raise DatafileError('Header is ["{}", "{}"] instead of ["Orth", "Phon"].'.format(orth, phon))
for (i, fields) in enumerate(reader):
try:
graph, phon = fields
except ValueError:
raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
graph = unicodedata.normalize('NFD', graph)
phon = unicodedata.normalize('NFD', phon)
g2p[graph].append(phon)
gr_by_line[graph].append(i)
if self._one_to_many_gr_by_line_map(g2p):
graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
lines = [l + 2 for l in lines]
raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
return g2p
language/script to be loaded
rev (boolean): True for reversing the table (for reverse transliterating)
"""
g2p = defaultdict(list)
gr_by_line = defaultdict(list)
code += '_rev' if rev else ''
try:
path = os.path.join('data', 'map', code + '.csv')
path = pkg_resources.resource_filename(__name__, path)
except IndexError:
raise DatafileError('Add an appropriately-named mapping to the data/maps directory.')
with open(path, 'rb') as f:
reader = csv.reader(f, encoding='utf-8')
orth, phon = next(reader)
if orth != 'Orth' or phon != 'Phon':
raise DatafileError('Header is ["{}", "{}"] instead of ["Orth", "Phon"].'.format(orth, phon))
for (i, fields) in enumerate(reader):
try:
graph, phon = fields
except ValueError:
raise DatafileError('Map file is not well formed at line {}.'.format(i + 2))
graph = unicodedata.normalize('NFD', graph)
phon = unicodedata.normalize('NFD', phon)
g2p[graph].append(phon)
gr_by_line[graph].append(i)
if self._one_to_many_gr_by_line_map(g2p):
graph, lines = self._one_to_many_gr_by_line_map(gr_by_line)
lines = [l + 2 for l in lines]
raise MappingError('One-to-many G2P mapping for "{}" on lines {}'.format(graph, ', '.join(map(str, lines))).encode('utf-8'))
return g2p
def _read_rule(self, i, line):
line = line.strip()
if line:
line = unicodedata.normalize('NFD', line)
s = re.match(r'(?P