How to use the rebulk.remodule.re function in rebulk

To help you get started, we’ve selected a few rebulk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pymedusa / Medusa / lib / guessit / rules / properties / part.py View on Github external
def part():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash], validator={'__parent__': seps_surround})

    prefixes = ['pt', 'part']

    def validate_roman(match):
        """
        Validate a roman match if surrounded by separators
        :param match:
        :type match:
        :return:
        :rtype:
        """
        if int_coercable(match.raw):
            return True
        return seps_surround(match)

    rebulk.regex(build_or_pattern(prefixes) + r'-?(?P' + numeral + r')',
github pymedusa / Medusa / lib / guessit / rules / properties / website.py View on Github external
def website():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE).string_defaults(ignore_case=True)
    rebulk.defaults(name="website")

    tlds = [l.strip().decode('utf-8')
            for l in resource_stream('guessit', 'tlds-alpha-by-domain.txt').readlines()
            if b'--' not in l][1:]  # All registered domain extension

    safe_tlds = ['com', 'org', 'net']  # For sure a website extension
    safe_subdomains = ['www']  # For sure a website subdomain
    safe_prefix = ['co', 'com', 'org', 'net']  # Those words before a tlds are sure

    website_prefixes = ['from']

    rebulk.regex(r'(?:[^a-z0-9]|^)((?:'+build_or_pattern(safe_subdomains) +
                 r'\.)+(?:[a-z-]+\.)+(?:'+build_or_pattern(tlds) +
                 r'))(?:[^a-z0-9]|$)',
                 children=True)
github guessit-io / guessit / guessit / rules / common / formatters.py View on Github external
replace_indices = []

        for potential_index in potential_indices:
            if potential_index - 2 in potential_indices or potential_index + 2 in potential_indices:
                replace_indices.append(potential_index)

        if replace_indices:
            for replace_index in replace_indices:
                dots.add(input_string[replace_index])
                clean_list[replace_index] = input_string[replace_index]
            clean_string = ''.join(clean_list)

    clean_string = strip(clean_string, ''.join([c for c in seps if c not in dots]))

    clean_string = re.sub(' +', ' ', clean_string)
    return clean_string
github h3llrais3r / Auto-Subliminal / lib / guessit / rules / properties / bonus.py View on Github external
def bonus(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'bonus'))
    rebulk = rebulk.regex_defaults(flags=re.IGNORECASE)

    rebulk.regex(r'x(\d+)', name='bonus', private_parent=True, children=True, formatter=int,
                 validator={'__parent__': lambda match: seps_surround},
                 conflict_solver=lambda match, conflicting: match
                 if conflicting.name in ('video_codec', 'episode') and 'weak-episode' not in conflicting.tags
                 else '__default__')

    rebulk.rules(BonusTitleRule)

    return rebulk
github pymedusa / Medusa / lib / guessit / rules / common / numeral.py View on Github external
Convert Word numeral to integer

    :param value: Value to parse
    :type value: string
    :return:
    :rtype:
    """
    for word_list in [english_word_numeral_list, french_word_numeral_list, french_alt_word_numeral_list]:
        try:
            return word_list.index(value.lower())
        except ValueError:
            pass
    raise ValueError  # pragma: no cover


_clean_re = re.compile(r'[^\d]*(\d+)[^\d]*')


def parse_numeral(value, int_enabled=True, roman_enabled=True, word_enabled=True, clean=True):
    """
    Parse a numeric value into integer.

    :param value: Value to parse. Can be an integer, roman numeral or word.
    :type value: string
    :param int_enabled:
    :type int_enabled:
    :param roman_enabled:
    :type roman_enabled:
    :param word_enabled:
    :type word_enabled:
    :param clean:
    :type clean: