Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
rebulk.rules(SubtitleExtensionRule,
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
RemoveLanguage,
RemoveInvalidLanguages(common_words))
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
return rebulk
UNDETERMINED = babelfish.Language('und')
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self, synonyms):
self.guessit_exceptions = {}
for code, synlist in synonyms.items():
if '_' in code:
(alpha3, country) = code.split('_')
else:
(alpha3, country) = (code, None)
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
rebulk.rules(SubtitleExtensionRule,
SubtitlePrefixLanguageRule,
SubtitleSuffixLanguageRule,
RemoveLanguage,
RemoveInvalidLanguages(common_words))
babelfish.language_converters['guessit'] = GuessitConverter(config['synonyms'])
return rebulk
UNDETERMINED = babelfish.Language('und')
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self, synonyms):
self.guessit_exceptions = {}
for code, synlist in synonyms.items():
if '_' in code:
(alpha3, country) = code.split('_')
else:
(alpha3, country) = (code, None)
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
('spa', None): ['esp', 'español', 'espanol'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'ptbr', 'br', 'brazilian'],
('cat', None): ['català', 'castellano', 'espanol castellano', 'español castellano'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))
SYN = {('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español', 'espanol'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'ptbr', 'br', 'brazilian'],
('cat', None): ['català', 'castellano', 'espanol castellano', 'español castellano'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))
rebulk.regex(r"(?:\d{3,}(?:x|\*))?360(?:i|p?x?)", value="360p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?368(?:i|p?x?)", value="368p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?480(?:i|p?x?)", value="480p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?576(?:i|p?x?)", value="576p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:i|p?(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720(?:p(?:50|60)?x?)", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?720p?hd", value="720p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?900(?:i|p?x?)", value="900p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080i", value="1080i")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?x?", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080(?:p(?:50|60)?x?)", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?1080p?hd", value="1080p")
rebulk.regex(r"(?:\d{3,}(?:x|\*))?2160(?:i|p?x?)", value="4K")
rebulk.string('4k', value='4K')
_digits_re = re.compile(r'\d+')
rebulk.defaults(name="screen_size", validator=seps_surround)
rebulk.regex(r'\d{3,}-?(?:x|\*)-?\d{3,}',
formatter=lambda value: 'x'.join(_digits_re.findall(value)),
abbreviations=[dash],
tags=['resolution'],
conflict_solver=lambda match, other: '__default__' if other.name == 'screen_size' else other)
rebulk.rules(ScreenSizeOnlyOne, RemoveScreenSizeConflicts)
return rebulk
self.abbreviations = kwargs.get('abbreviations', [])
self._kwargs = kwargs
self._match_kwargs = filter_match_kwargs(kwargs)
self._children_match_kwargs = filter_match_kwargs(kwargs, children=True)
self._patterns = []
for pattern in patterns:
if isinstance(pattern, six.string_types):
if self.abbreviations and pattern:
for key, replacement in self.abbreviations:
pattern = pattern.replace(key, replacement)
pattern = call(re.compile, pattern, **self._kwargs)
elif isinstance(pattern, dict):
if self.abbreviations and 'pattern' in pattern:
for key, replacement in self.abbreviations:
pattern['pattern'] = pattern['pattern'].replace(key, replacement)
pattern = re.compile(**pattern)
elif hasattr(pattern, '__iter__'):
pattern = re.compile(*pattern)
self._patterns.append(pattern)
conflict_solver=lambda match, other: match
if other.name in ['episode', 'season']
else '__default__')
rebulk.functional(guess_idnumber, name='uuid',
conflict_solver=lambda match, other: match
if other.name in ['episode', 'season']
else '__default__')
return rebulk
_DIGIT = 0
_LETTER = 1
_OTHER = 2
_idnum = re.compile(r'(?P[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
def guess_idnumber(string):
"""
Guess id number function
:param string:
:type string:
:return:
:rtype:
"""
# pylint:disable=invalid-name
ret = []
matches = list(_idnum.finditer(string))
for match in matches:
result = match.groupdict()
conflict_solver=lambda match, other: other
if other.name in ['episode', 'season']
else '__default__')
rebulk.functional(guess_idnumber, name='uuid',
conflict_solver=lambda match, other: match
if other.name in ['episode', 'season']
else '__default__')
return rebulk
_DIGIT = 0
_LETTER = 1
_OTHER = 2
_idnum = re.compile(r'(?P[a-zA-Z0-9-]{20,})') # 1.0, (0, 0))
def guess_idnumber(string):
"""
Guess id number function
:param string:
:type string:
:return:
:rtype:
"""
# pylint:disable=invalid-name
ret = []
matches = list(_idnum.finditer(string))
for match in matches:
result = match.groupdict()
Convert Word numeral to integer
:param value: Value to parse
:type value: string
:return:
:rtype:
"""
for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
try:
return word_list.index(value.lower())
except ValueError:
pass
raise ValueError # pragma: no cover
_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')
def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
"""
Parse a numeric value into integer.
:param value: Value to parse. Can be an integer, roman numeral or word.
:type value: string
:param int_enabled:
:type int_enabled:
:param roman_enabled:
:type roman_enabled:
:param word_enabled:
:type word_enabled:
:param clean:
:type clean:
SYN = {('ell', None): ['gr', 'greek'],
('spa', None): ['esp', 'español', 'espanol'],
('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'],
('swe', None): ['se'],
('por', 'BR'): ['po', 'pb', 'pob', 'ptbr', 'br', 'brazilian'],
('cat', None): ['català', 'castellano', 'espanol castellano', 'español castellano'],
('ces', None): ['cz'],
('ukr', None): ['ua'],
('zho', None): ['cn'],
('jpn', None): ['jp'],
('hrv', None): ['scr'],
('mul', None): ['multi', 'dl']} # http://scenelingo.wordpress.com/2009/03/24/what-does-dl-mean/
class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring
_with_country_regexp = re.compile(r'(.*)\((.*)\)')
_with_country_regexp2 = re.compile(r'(.*)-(.*)')
def __init__(self):
self.guessit_exceptions = {}
for (alpha3, country), synlist in SYN.items():
for syn in synlist:
self.guessit_exceptions[syn.lower()] = (alpha3, country, None)
@property
def codes(self): # pylint: disable=missing-docstring
return (babelfish.language_converters['alpha3b'].codes |
babelfish.language_converters['alpha2'].codes |
babelfish.language_converters['name'].codes |
babelfish.language_converters['opensubtitles'].codes |
babelfish.country_converters['name'].codes |
frozenset(self.guessit_exceptions.keys()))