How to use the regex.match function in regex

To help you get started, we’ve selected a few regex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intentionet / netconan / tests / unit / test_ip_anonymization.py View on Github external
# Make sure all addresses to be checked are in ip_tree and generate reference mapping
    for ip_addr_raw in ip_v4_list:
        ip_addr = anonymizer_v4.make_addr(ip_addr_raw)
        ip_int = int(ip_addr)
        ip_int_anon = anonymizer_v4.anonymize(ip_int)
        ip_addr_anon = str(ipaddress.IPv4Address(ip_int_anon))
        ip_mapping[str(ip_addr)] = ip_addr_anon

    filename = str(tmpdir.mkdir("test").join("test_dump_iptree.txt"))
    with open(filename, 'w') as f_tmp:
        anonymizer_v4.dump_to_file(f_tmp)

    with open(filename, 'r') as f_tmp:
        # Build mapping dict from the output of the ip_tree dump
        for line in f_tmp.readlines():
            m = regex.match(r'\s*(\d+\.\d+.\d+.\d+)\s+(\d+\.\d+.\d+.\d+)\s*', line)
            ip_addr = m.group(1)
            ip_addr_anon = m.group(2)
            ip_mapping_from_dump[ip_addr] = ip_addr_anon

    for ip_addr in ip_mapping:
        # Confirm anon addresses from ip_tree dump match anon addresses from _convert_to_anon_ip
        assert(ip_mapping[ip_addr] == ip_mapping_from_dump[ip_addr])
github standardebooks / tools / se / vendor / kobo_touch_extended / kobo.py View on Github external
def append_kobo_spans_from_text(node, text):
	global paragraph_counter
	global segment_counter

	if text is not None:
		# If text is only whitespace, don't add spans
		if regex.match(r"^\s+$", text, flags=regex.MULTILINE):
			return False
		else:
			# Split text in sentences
			groups = regex.split(r'(.*?[\.\!\?\:][\'"\u201d\u2019]?\s*)', text, flags=regex.MULTILINE)
			# Remove empty strings resulting from split()
			groups = [g for g in groups if g != ""]

			# To match Kobo KePubs, the trailing whitespace needs to be
			# prepended to the next group. Probably equivalent to make sure
			# the space stays in the span at the end.
			# add each sentence in its own span
			for group in groups:
				span = etree.Element("{%s}span" % ("http://www.w3.org/1999/xhtml", ), attrib={"id": "kobo.{0}.{1}".format(paragraph_counter, segment_counter), "class": "koboSpan"})
				span.text = group
				node.append(span)
				segment_counter += 1
github dmort27 / epitran / epitran / backoff.py View on Github external
tr_list = []
        while token:
            is_outside_lang = True
            for dia, lang in zip(self.dias, self.langs):
                source = ''
                while True:
                    m = lang.epi.regexp.match(dia.process(token))
                    if not m:
                        break
                    s = m.group()
                    token = token[len(s):]
                    source += s
                    is_outside_lang = False
                tr_list.append(lang.transliterate(source))
            if is_outside_lang:
                m = re.match(r'\p{Number}+', token)
                if m:
                    source = m.group()
                    tr_list.append(source)
                    token = token[len(source):]
                else:
                    tr_list.append(token[0])
                    token = token[1:]
        return ''.join(tr_list)
github melissaboiko / joyodb / joyodb / convert.py View on Github external
Or a range, with a U+FF0D FULLWIDTH HYPHEN-MINUS:
    >>> is_sound_index('キ-キツ')
    True

    Or also with hiragana:
    >>> is_sound_index('オウ-おそれ')
    True

    Content lines won't match:
    >>> is_page_index('\t \t \t なつける\t 懐ける\t')
    False
    """

    line = line.strip()
    if re.match(r'^[\p{Katakana}\p{Hiragana}-]+$', line):
        return(True)
    else:
        return(False)
github microsoft / Recognizers-Text / Python / libraries / recognizers-date-time / recognizers_date_time / date_time / base_date.py View on Github external
# handle "last Friday", "last mon"
        match = regex.match(self.config.last_regex, trimmed_source)
        if match and match.start() == 0 and len(match.group()) == len(trimmed_source):
            weekday_str = match.group(Constants.WEEKDAY_GROUP_NAME)
            value = DateUtils.last(
                reference, self.config.day_of_week.get(weekday_str))

            result.timex = DateTimeFormatUtil.luis_date_from_datetime(value)
            result.future_value = value
            result.past_value = value
            result.success = True
            return result

        # handle "Friday"
        match = regex.match(self.config.week_day_regex, trimmed_source)
        if match and match.start() == 0 and len(match.group()) == len(trimmed_source):
            weekday_str = match.group(Constants.WEEKDAY_GROUP_NAME)
            weekday = self.config.day_of_week.get(weekday_str)
            value = DateUtils.this(reference, weekday)

            if weekday < int(DayOfWeek.MONDAY):
                weekday = int(DayOfWeek.SUNDAY)

            if weekday < reference.isoweekday():
                value = DateUtils.next(reference, weekday)

            result.timex = 'XXXX-WXX-' + str(weekday)
            future_date = value
            past_date = value

            if future_date < reference:
github merc-devel / merc / merc / user.py View on Github external
def hostmask_matches(self, pattern):
    return regex.match(fnmatch.translate(util.to_irc_lower(pattern)),
                       util.to_irc_lower(self.hostmask)) is not None
github lioncash / cppcheck-configs / Generator / Generator.py View on Github external
Determines the type of line within a function based off
    the starting characters on the line.
    """
    if len(line) == 0:
        return FunctionLineType.EMPTY
    if regex.match("^pure", line, regex.IGNORECASE):
        return FunctionLineType.PURE
    if regex.match("^const", line, regex.IGNORECASE):
        return FunctionLineType.CONST
    if regex.match("^ur", line, regex.IGNORECASE):
        return FunctionLineType.USE_RETVAL
    if regex.match("^rv", line, regex.IGNORECASE):
        return FunctionLineType.RETURN_TYPE
    if regex.match("^li", line, regex.IGNORECASE):
        return FunctionLineType.LEAK_IGNORE
    if regex.match("^nr", line, regex.IGNORECASE):
        return FunctionLineType.NORETURN
    if regex.match("^[0-9]+", line, regex.IGNORECASE):
        return FunctionLineType.ARGUMENT

    raise ParseError("Invalid function line type '{}' encountered.".format(line))
github cbuijs / unbound-dns-firewall / dns-firewall.py View on Github external
sourcefile = downloadfile
                                else:
                                    sourcefile = source

                                if file_exist(sourcefile) >= 0:
                                    if sourcefile != listfile:
                                        try:
                                            log_info(tag + 'Creating \"' + id + '\" file \"' + listfile + '\" from \"' + sourcefile + '\"')
                                            with open(sourcefile, 'r') as f:
                                                try:
                                                    with open(listfile, 'w') as g:
                                                        for line in f:
                                                            line = line.replace('\r', '').lower().strip()
                                                            if line and len(line) >0:
                                                                if not exclude.match(line):
                                                                    matchentry = regex.match(fregex, line, regex.I)
                                                                    if matchentry:
                                                                        for placeholder in ['asn', 'domain', 'entry', 'ip', 'line', 'regex']:
                                                                            try:
                                                                                entry = matchentry.group(placeholder)
                                                                            except:
                                                                                entry = False

                                                                            if entry and len(entry) > 0:
                                                                                if not exclude.match(entry):
                                                                                    # !!! To do: use placholder to pre-process/validate/error-check type of entry via regex
                                                                                    #print placeholder, entry
                                                                                    g.write(entry)
                                                                                    g.write('\n')
                                                                                else:
                                                                                    if (debug >= 3): log_info(tag + id +': Skipping excluded entry \"' + line + '\" (' + entry + ')')
github intentionet / netconan / netconan / sensitive_item_removal.py View on Github external
def _check_sensitive_item_format(val):
    """Determine the type/format of the value passed in."""
    item_format = _sensitive_item_formats.text

    # Order is important here (e.g. type 7 looks like hex or text, but has a
    # specific format so it should override hex or text)
    if regex.match(r'^\$9\$[\S]+$', val):
        item_format = _sensitive_item_formats.juniper_type9
    if regex.match(r'^\$6\$[\S]+$', val):
        item_format = _sensitive_item_formats.sha512
    if regex.match(r'^\$1\$[\S]+\$[\S]+$', val):
        item_format = _sensitive_item_formats.md5
    if regex.match(r'^[0-9a-fA-F]+$', val):
        item_format = _sensitive_item_formats.hexadecimal
    if regex.match(r'^[01][0-9]([0-9a-fA-F]{2})+$', val):
        item_format = _sensitive_item_formats.cisco_type7
    if regex.match(r'^[0-9]+$', val):
        item_format = _sensitive_item_formats.numeric
    return item_format