Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Convert spelled out numbers in a given text to digits.
"""
values = []
for item in reg.text_pattern_reg(lang).finditer(text):
surface, span = clean_surface(item.group(0), item.span())
if not surface or surface.lower() in reg.scales(lang):
continue
curr = result = 0.0
for word in surface.split():
try:
scale, increment = (
1,
float(
re.sub(
r"(-$|[%s])" % reg.grouping_operators_regex(lang),
"",
word.lower(),
)
),
)
except ValueError:
scale, increment = reg.numberwords(lang)[word.lower()]
curr = curr * scale + increment
if scale > 100:
result += curr
curr = 0.0
values.append(
{
"old_surface": surface,
"old_span": span,
"new_surface": str(result + curr),
def get_values(item, lang="en_US"):
"""
Extract value from regex hit.
"""
def callback(pattern):
return " %s" % (reg.unicode_fractions()[pattern.group(0)])
fracs = r"|".join(reg.unicode_fractions())
value = item.group("value")
# Remove grouping operators
value = re.sub(
r"(?<=\d)[%s](?=\d{3})" % reg.grouping_operators_regex(lang), "", value
)
# Replace unusual exponents by e (including e)
value = re.sub(
r"(?<=\d)(%s)(e|E|10)\^?" % reg.multiplication_operators_regex(lang), "e", value
)
# calculate other exponents
value, factors = resolve_exponents(value)
_LOGGER.debug("After exponent resolution: {}".format(value))
value = re.sub(fracs, callback, value, re.IGNORECASE)
range_separator = re.findall(
r"\d+ ?((?:-\ )?(?:%s)) ?\d" % "|".join(reg.ranges(lang)), value
)
uncer_separator = re.findall(
r"\d+ ?(%s) ?\d" % "|".join(reg.uncertainties(lang)), value