Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for rexp, span_adjust in weak_episode_rexps:
match = re.search(rexp, string, re.IGNORECASE)
if match:
metadata = match.groupdict()
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1])
epnum = int(metadata['episodeNumber'])
if epnum > 100:
season, epnum = epnum // 100, epnum % 100
# episodes which have a season > 25 are most likely errors
# (Simpsons is at 23!)
if season > 25:
continue
return Guess({ 'season': season,
'episodeNumber': epnum },
confidence=0.6, raw=string[span[0]:span[1]]), span
else:
return Guess(metadata, confidence=0.3, raw=string[span[0]:span[1]]), span
return None, None
def process(self, mtree, options=None):
GuessFinder(self.guess_country, None, self.log, options).process_nodes(mtree.unidentified_leaves())
for node in mtree.leaves_containing('language'):
c = node.clean_value.lower()
if c in self.replace_language:
node.guess.set('language', None)
try:
country = Country.fromguessit(c)
if self.is_valid_country(country, options):
guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span)
found_guess(node, guess, logger=log)
except babelfish.Error:
pass
def process(self, mtree, options=None):
GuessFinder(self.guess_country, None, self.log, options).process_nodes(mtree.unidentified_leaves())
for node in mtree.leaves_containing('language'):
c = node.clean_value.lower()
if c in self.replace_language:
node.guess.set('language', None)
try:
country = Country.fromguessit(c)
if self.is_valid_country(country, options):
guess = Guess(country=country, confidence=0.9, input=node.value, span=node.span)
found_guess(node, guess, logger=log)
except babelfish.Error:
pass
def guess_country(self, string, node=None, options=None):
c = string.strip().lower()
if c not in LNG_COMMON_WORDS:
try:
country, country_span = self._scan_country(c, True)
if self.is_valid_country(country, options):
guess = Guess(country=country, confidence=1.0, input=node.value, span=(country_span[0] + 1, country_span[1] + 1))
return guess
except babelfish.Error:
pass
return None, None
def __init__(self, string='', span=None, parent=None):
self.string = string
self.span = span or (0, len(string))
self.parent = parent
self.children = []
self.guess = Guess()
def guess_language(string, node, skip=None):
if skip:
relative_skip = []
for entry in skip:
node_idx = entry['node_idx']
span = entry['span']
if node_idx == node.node_idx[:len(node_idx)]:
relative_span = (span[0] - node.offset + 1, span[1] - node.offset + 1)
relative_skip.append(relative_span)
skip = relative_skip
language, span, confidence = search_language(string, skip=skip)
if language:
return (Guess({'language': language},
confidence=confidence,
raw= string[span[0]:span[1]]),
span)
return None, None
result, span = matcher(*all_args, **kwargs)
else:
result, span = matcher(*all_args)
if result:
# readjust span to compensate for sentinels
span = (span[0] - 1, span[1] - 1)
if isinstance(result, Guess):
if confidence is None:
confidence = result.confidence(list(result.keys())[0])
else:
if confidence is None:
confidence = 1.0
guess = format_guess(Guess(result, confidence=confidence, raw=string[span[0] + 1:span[1] + 1]))
msg = 'Found with confidence %.2f: %s' % (confidence, guess)
(logger or log).debug(msg)
node.partition(span)
absolute_span = (span[0] + node.offset, span[1] + node.offset)
for child in node.children:
if child.span == absolute_span:
child.guess = guess
else:
find_and_split_node(child, strategy, logger)
return
def found_property(node, name, value, confidence):
node.guess = Guess({ name: value },
confidence=confidence,
raw=value)
log.debug('Found with confidence %.2f: %s' % (confidence, node.guess))
def guess_video_rexps(string):
string = '-' + string + '-'
for rexp, confidence, span_adjust in video_rexps:
match = re.search(sep + rexp + sep, string, re.IGNORECASE)
if match:
metadata = match.groupdict()
# is this the better place to put it? (maybe, as it is at least
# the soonest that we can catch it)
if metadata.get('cdNumberTotal', -1) is None:
del metadata['cdNumberTotal']
span = (match.start() + span_adjust[0],
match.end() + span_adjust[1] - 2)
return (Guess(metadata, confidence=confidence, raw=string[span[0]:span[1]]),
span)
return None, None
for node in mtree.unidentified_leaves():
if len(node.node_idx) == 2:
c = node.value[1:-1].lower()
if c in country_common_words:
continue
# only keep explicit groups (enclosed in parentheses/brackets)
if node.value[0] + node.value[-1] not in ['()', '[]', '{}']:
continue
try:
country = Country(c, strict=True)
except ValueError:
continue
node.guess = Guess(country=country, confidence=1.0, raw=c)