Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_regex_replace(self):
"""Test replacing with a regex."""
self.assertEqual(textlib.replaceExcept('A123B', r'\d', r'x', [],
site=self.site),
'AxxxB')
self.assertEqual(textlib.replaceExcept('A123B', r'\d+', r'x', [],
site=self.site),
'AxB')
self.assertEqual(textlib.replaceExcept('A123B',
r'A(\d)2(\d)B', r'A\1x\2B', [],
site=self.site),
'A1x3B')
self.assertEqual(
textlib.replaceExcept('', r'(a?)', r'\1B', [], site=self.site),
'B')
self.assertEqual(
textlib.replaceExcept('abc', r'x*', r'-', [], site=self.site),
'-a-b-c-')
# This is different from re.sub() as re.sub() doesn't
def test_replace_with_marker(self):
"""Test replacing with a marker."""
self.assertEqual(textlib.replaceExcept('AxyxB', 'x', 'y', [],
marker='.',
site=self.site),
'Ayyy.B')
self.assertEqual(textlib.replaceExcept('AxyxB', '1', 'y', [],
marker='.',
site=self.site),
'AxyxB.')
'[[File:x|]]')
self.assertEqual(
textlib.replaceExcept(
'[[File:x|foo|bar x]] x',
'x', 'y', ['file'], site=self.site),
'[[File:x|foo|bar x]] y')
self.assertEqual(
textlib.replaceExcept(
'[[File:x|]][[File:x|foo]]',
'x', 'y', ['file'], site=self.site),
'[[File:x|]][[File:x|foo]]')
self.assertEqual(
textlib.replaceExcept(
'[[NonFile:x]]',
'x', 'y', ['file'], site=self.site),
'[[NonFile:y]]')
self.assertEqual(
textlib.replaceExcept(
'[[File:]]',
'File:', 'NonFile:', ['file'], site=self.site),
'[[File:]]')
self.assertEqual(
textlib.replaceExcept(
'[[File:x|[[foo]].]]',
'x', 'y', ['file'], site=self.site),
'[[File:x|[[foo]].]]')
'x', 'y', ['source'],
site=self.site),
'<source>x')
self.assertEqual(textlib.replaceExcept(
'x',
'x', 'y', ['source'], site=self.site),
'x')
self.assertEqual(
textlib.replaceExcept('x',
'x', 'y', ['source'], site=self.site),
'x')
self.assertEqual(textlib.replaceExcept('x',
'x', 'y', ['includeonly'],
site=self.site),
'x')
self.assertEqual(textlib.replaceExcept('x', 'x', 'y',
['ref'], site=self.site),
'x')
self.assertEqual(textlib.replaceExcept('A',
'x', 'y',
['ref'], site=self.site),
'A')
self.assertEqual(textlib.replaceExcept(' xA ', 'x', 'y',
['startspace'], site=self.site),
' xA ')
self.assertEqual(textlib.replaceExcept(':xA ', 'x', 'y',
['startcolon'], site=self.site),
':xA ')
self.assertEqual(textlib.replaceExcept('x<table></table>', 'x', 'y',
['table'], site=self.site),
'x<table></table>')
self.assertEqual(textlib.replaceExcept('x [http://www.sample.com x]',
# FIXME: use textlib.NON_LATIN_DIGITS
# valid digits
digits = {
'ckb': u'٠١٢٣٤٥٦٧٨٩',
'fa': u'۰۱۲۳۴۵۶۷۸۹',
}
faChrs = u'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
new = digits.pop(self.site.code)
# This only works if there are only two items in digits dict
old = digits[list(digits.keys())[0]]
# not to let bot edits in latin content
exceptions.append(re.compile(u"[^%(fa)s] *?\"*? *?, *?[^%(fa)s]"
% {'fa': faChrs}))
text = textlib.replaceExcept(text, ',', '،', exceptions, site=self.site)
if self.site.code == 'ckb':
text = textlib.replaceExcept(text,
'\u0647([.\u060c_<\\]\\s])',
'\u06d5\\1', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'ه\u200c', 'ە', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'ه', 'ھ', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'ك', 'ک', exceptions,
site=self.site)
text = textlib.replaceExcept(text, '[ىي]', 'ی', exceptions,
site=self.site)
return text
# FIXME: split this function into two.
# replace persian/arabic digits
"""
sourceSite = sourceImagePage.site
url = sourceImagePage.fileUrl().encode('utf-8')
pywikibot.output('URL should be: ' + url)
# localize the text that should be printed on image description page
try:
description = sourceImagePage.get()
# try to translate license templates
if (sourceSite.sitename,
self.targetSite.sitename) in licenseTemplates:
for old, new in licenseTemplates[
(sourceSite.sitename,
self.targetSite.sitename)].items():
new = '{{%s}}' % new
old = re.compile('{{%s}}' % old)
description = textlib.replaceExcept(description, old, new,
['comment', 'math',
'nowiki', 'pre'])
description = i18n.twtranslate(self.targetSite,
'imagetransfer-file_page_message',
{'site': sourceSite,
'description': description})
description += '\n\n'
description += sourceImagePage.getFileVersionHistoryTable()
# add interwiki link
if sourceSite.family == self.targetSite.family:
description += '\n\n{0}'.format(sourceImagePage)
except pywikibot.NoPage:
description = ''
pywikibot.output(
'Image does not exist or description page is empty.')
# valid digits
digits = {
'ckb': '٠١٢٣٤٥٦٧٨٩',
'fa': '۰۱۲۳۴۵۶۷۸۹',
}
faChrs = 'ءاآأإئؤبپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیةيك' + digits['fa']
new = digits.pop(self.site.code)
# This only works if there are only two items in digits dict
old = digits[list(digits.keys())[0]]
# not to let bot edits in latin content
exceptions.append(re.compile('[^%(fa)s] *?\"*? *?, *?[^%(fa)s]'
% {'fa': faChrs}))
text = textlib.replaceExcept(text, ',', '،', exceptions,
site=self.site)
if self.site.code == 'ckb':
text = textlib.replaceExcept(text,
'\u0647([.\u060c_<\\]\\s])',
'\u06d5\\1', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'ه\u200c', 'ە', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'ه', 'ھ', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'ك', 'ک', exceptions,
site=self.site)
text = textlib.replaceExcept(text, '[ىي]', 'ی', exceptions,
site=self.site)
return text
# FIXME: split this function into two.
# replace persian/arabic digits
def fixTypo(self, text):
"""Fix units."""
exceptions = ['nowiki', 'comment', 'math', 'pre', 'source',
'startspace', 'gallery', 'hyperlink', 'interwiki',
'link']
# change ccm -> cm³
text = textlib.replaceExcept(text, r'(\d)\s*(?: )?ccm',
r'\1 cm³', exceptions,
site=self.site)
# Solve wrong Nº sign with °C or °F
# additional exception requested on fr-wiki for this stuff
pattern = re.compile('«.*?»', re.UNICODE)
exceptions.append(pattern)
text = textlib.replaceExcept(text, r'(\d)\s*(?: )?[º°]([CF])',
r'\1 °\2', exceptions,
site=self.site)
text = textlib.replaceExcept(text, 'º([CF])', '°' + r'\1',
exceptions,
site=self.site)
return text
text,
r'\[\[(?Phttps?://[^\]]+?)\]\]?',
r'[\g]', exceptions, site=self.site)
# external link and description separated by a pipe, with
# whitespace in front of the pipe, so that it is clear that
# the dash is not a legitimate part of the URL.
text = textlib.replaceExcept(
text,
r'\[(?Phttps?://[^\|\] \r\n]+?) +\| *(?P<label>[^\|\]]+?)\]',
r'[\g \g<label>]', exceptions)
# dash in external link, where the correct end of the URL can
# be detected from the file extension. It is very unlikely that
# this will cause mistakes.
extensions = [r'\.{0}'.format(ext)
for ext in ['pdf', 'html?', 'php', 'aspx?', 'jsp']]
text = textlib.replaceExcept(
text,
r'\[(?Phttps?://[^\|\] ]+?(' + '|'.join(extensions) + r')) *'
r'\| *(?P<label>[^\|\]]+?)\]',
r'[\g \g<label>]', exceptions)
return text
</label></label></label></label>
# FIXME: split this function into two.
# replace persian/arabic digits
# deactivated due to bug T57185
for i in range(0, 10):
text = textlib.replaceExcept(text, old[i], new[i], exceptions)
# do not change digits in class, style and table params
pattern = re.compile(r'\w+=(".+?"|\d+)', re.UNICODE)
exceptions.append(pattern)
# do not change digits inside html-tags
pattern = re.compile('<[/]*?[^', re.UNICODE)
exceptions.append(pattern)
exceptions.append('table') # exclude tables for now
# replace digits
for i in range(0, 10):
text = textlib.replaceExcept(text, str(i), new[i], exceptions)
return text