Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
operator_index = group_operators[index - 2]
# Remove (original length - new end) characters
unit_shortening = item.end() - item.start(operator_index)
_LOGGER.debug(
"Because operator inconsistency, cut from "
"operator: '{}', new surface: {}".format(
operator,
text[item.start() : item.end() - unit_shortening],
)
)
break
# Determine whether a negative power has to be applied to following
# units
if operator and not slash:
slash = any(i in operator for i in reg.division_operators(lang))
# Determine which unit follows
if unit:
unit_surface, power = parse_unit(item, unit, slash, lang)
base = dis.disambiguate_unit(unit_surface, text, lang)
derived += [{"base": base, "power": power, "surface": unit_surface}]
unit = get_unit_from_dimensions(derived, text, lang)
_LOGGER.debug("\tUnit: %s", unit)
_LOGGER.debug("\tEntity: %s", unit.entity)
return unit, unit_shortening
logging.basicConfig(format=log_format)
if verbose: # pragma: no cover
prev_level = logging.root.getEffectiveLevel()
logging.root.setLevel(logging.DEBUG)
_LOGGER.debug("Verbose mode")
orig_text = text
_LOGGER.debug('Original text: "%s"', orig_text)
text = clean_text(text, lang)
values = extract_spellout_values(text, lang)
text, shifts = substitute_values(text, values)
quantities = []
for item in reg.units_regex(lang).finditer(text):
groups = dict([i for i in item.groupdict().items() if i[1] and i[1].strip()])
_LOGGER.debug(u"Quantity found: %s", groups)
try:
uncert, values = get_values(item, lang)
unit, unit_shortening = get_unit(item, text)
surface, span = get_surface(shifts, orig_text, item, text, unit_shortening)
objs = build_quantity(
orig_text, text, item, values, unit, surface, span, uncert, lang
)
if objs is not None:
quantities += objs
except ValueError as err:
_LOGGER.debug("Could not parse quantity: %s", err)
item_units = [item.group(i) for i in group_units if item.group(i)]
if len(item_units) == 0:
unit = load.units(lang).names["dimensionless"]
else:
derived, slash = [], False
multiplication_operator = False
for index in range(0, 5):
unit = item.group(group_units[index])
operator_index = None if index < 1 else group_operators[index - 1]
operator = None if index < 1 else item.group(operator_index)
# disallow spaces as operators in units expressed in their symbols
# Enforce consistency among multiplication and division operators
# Single exceptions are colloquial number abbreviations (5k miles)
if operator in reg.multiplication_operators(lang) or (
operator is None
and unit
and not (index == 1 and unit in reg.suffixes(lang))
):
if multiplication_operator != operator and not (
index == 1 and str(operator).isspace()
):
if multiplication_operator is False:
multiplication_operator = operator
else:
# Cut if inconsistent multiplication operator
# treat the None operator differently - remove the
# whole word of it
if operator is None:
# For this, use the last consistent operator
# (before the current) with a space
def resolve_exponents(value, lang="en_US"):
"""Resolve unusual exponents (like 2^4) and return substituted string and
factor
Params:
value: str, string with only one value
Returns:
str, string with basis and exponent removed
array of float, factors for multiplication
"""
factors = []
matches = re.finditer(
reg.number_pattern_groups(lang), value, re.IGNORECASE | re.VERBOSE
)
for item in matches:
if item.group("base") and item.group("exponent"):
base = item.group("base")
exp = item.group("exponent")
if base in ["e", "E"]:
# already handled by float
factors.append(1)
continue
# exp = '10'
# Expect that in a pure decimal base,
# either ^ or superscript notation is used
if re.match(r"\d+\^?", base):
if not (
"^" in base
or re.match(r"[%s]" % reg.unicode_superscript_regex(), exp)
exp = item.group("exponent")
if base in ["e", "E"]:
# already handled by float
factors.append(1)
continue
# exp = '10'
# Expect that in a pure decimal base,
# either ^ or superscript notation is used
if re.match(r"\d+\^?", base):
if not (
"^" in base
or re.match(r"[%s]" % reg.unicode_superscript_regex(), exp)
):
factors.append(1)
continue
for superscript, substitute in reg.unicode_superscript().items():
exp.replace(superscript, substitute)
exp = float(exp)
base = float(base.replace("^", ""))
factor = base ** exp
stripped = str(value).replace(item.group("scale"), "")
value = stripped
factors.append(factor)
_LOGGER.debug(
"Replaced {} by factor {}".format(item.group("scale"), factor)
)
else:
factors.append(1)
continue
return value, factors
"""
Parse surface and power from unit text.
"""
surface = unit.replace(".", "")
power = re.findall(r"-?[0-9%s]+" % reg.unicode_superscript_regex(), surface)
power_written = re.findall(r"\b(%s)\b" % "|".join(reg.powers(lang)), surface)
if power:
power = [
reg.unicode_superscript()[i] if i in reg.unicode_superscript() else i
for i in power
]
power = "".join(power)
new_power = -1 * int(power) if slash else int(power)
surface = re.sub(r"\^?-?[0-9%s]+" % reg.unicode_superscript(), "", surface)
elif power_written:
exponent = reg.powers(lang)[power_written[0]]
new_power = -exponent if slash else exponent
surface = re.sub(r"\b%s\b" % power_written[0], "", surface).strip()
else:
new_power = -1 if slash else 1
return surface, new_power