Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@cached
def _get_regex(lang="en_US"):
"""
Get regex module for given language
:param lang:
:return:
"""
return language.get("regex", lang)
@cached
def number_pattern_no_groups(lang="en_US"):
return NUM_PATTERN.format(
number=":",
decimals=":",
scale=":",
base=":",
exponent=":",
fraction=":",
grouping=grouping_operators_regex(lang),
multipliers=multiplication_operators_regex(lang),
superscript=unicode_superscript_regex(),
unicode_fract=unicode_fractions_regex(),
decimal_operators=decimal_operators_regex(lang),
)
@cached
def range_pattern(lang="en_US"):
num_pattern_no_groups = number_pattern_no_groups(lang)
return r""" # Pattern for a range of numbers
(?: # First number
(?
@cached
def division_operators(lang="en_US"):
div = {u"/"}
div.update(_get_regex(lang).DIVISION_OPERATORS)
return div
@cached
def operators(lang="en_US"):
ops = set()
ops.update(multiplication_operators(lang))
ops.update(division_operators(lang))
return ops
@cached
def decimal_operators_regex(lang="en_US"):
return "".join(decimal_operators(lang))
@cached
def decimal_operators(lang="en_US"):
return _get_regex(lang).DECIMAL_OPERATORS
@cached
def numberwords(lang="en_US"):
"""
Convert number words to integers in a given text.
"""
numwords = {}
numwords.update(miscnum(lang))
for idx, word in enumerate(units(lang)):
numwords[word] = (1, idx)
for idx, word in enumerate(tens(lang)):
numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales(lang)):
numwords[word] = (10 ** (idx * 3 or 2), 0)
for word, factor in decimals(lang).items():
@cached
def multiplication_operators_regex(lang="en_US"):
return r"|".join(r"%s" % re.escape(i) for i in multiplication_operators(lang))
@cached
def uncertainties(lang="en_US"):
uncertainties_ = {r"\+/-", r"±"}
uncertainties_.update(_get_regex(lang).UNCERTAINTIES)
return uncertainties_