Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for hit in hits:
name = hit[2:-1]
try:
entnum = int(name)
s = s.replace(hit, chr(entnum))
except ValueError:
pass
matches = re.findall("&\w+;", s)
hits = set(matches)
amp = "&"
if amp in hits:
hits.remove(amp)
for hit in hits:
name = hit[1:-1]
if name in html.entities.name2codepoint:
s = s.replace(hit, "")
s = s.replace(amp, "&")
return s
input_file = sys.argv[1]
output_file = sys.argv[2]
in_pre = False
with open(output_file, 'w', encoding="utf-8") as _out, open(input_file, encoding="utf-8") as _in:
for line in _in:
# round-robin image servers
if "<img by="" for="" unicode="" in="" src="i/""> blocks
g = line.strip()
def codepoint2name(code):
"""Return entity definition by code, or the code if not defined."""
entity = html.entities.codepoint2name.get(code)
if entity:
return "%s%s;" % (util.AMP_SUBSTITUTE, entity)
else:
return "%s#%d;" % (util.AMP_SUBSTITUTE, code)
def name2cp(k):
if k == 'apos': return ord("'")
if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
return htmlentitydefs.name2codepoint[k]
else:
k = htmlentitydefs.entitydefs[k]
if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
return ord(codecs.latin_1_decode(k)[0])
def char2entity(c):
cp = ord(c)
name = htmlentitydefs.codepoint2name.get(cp)
return '&%s;' % name if name is not None else '&#%d;' % cp
def name2cp(k):
if k == 'apos': return ord("'")
if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
return htmlentitydefs.name2codepoint[k]
else:
k = htmlentitydefs.entitydefs[k]
if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
return ord(codecs.latin_1_decode(k)[0])
def handle_entityref(self, name):
codepoint = entities.name2codepoint[name]
self.strings.append(chr(codepoint))
def convert_entityref(self, ref):
if ref in html.entities.name2codepoint:
return chr(html.entities.name2codepoint[ref])
return "[?]"
Tidys up unicode entities into HTML friendly entities
Takes a unicode string as an argument
Returns a unicode string
"""
try: # python2 compatible
import htmlentitydefs
except ImportError:
import html.entities
escaped = ""
for char in unistr:
try: # python2 compatible
if ord(char) in htmlentitydefs.codepoint2name:
name = htmlentitydefs.codepoint2name.get(ord(char))
escaped += '&%s;' % name if 'nbsp' not in name else ' '
except NameError:
if ord(char) in html.entities.codepoint2name:
name = html.entities.codepoint2name.get(ord(char))
escaped += '&%s;' % name if 'nbsp' not in name else ' '
else:
escaped += char
return escaped
def _replace_entity(match):
text = match.group(1)
if text[0] == '#':
text = text[1:]
try:
if text[0] in 'xX':
c = int(text[1:], 16)
else:
c = int(text)
return chr(c)
except ValueError:
return match.group(0)
else:
try:
return chr(html.entities.name2codepoint[text])
except KeyError:
return match.group(0)