Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build_name2codepoint_dict():
"""
Builds name to codepoint dictionary
copy and paste the output to the name2codepoint dictionary
name2str - name to utf-8 string dictionary
"""
import html.entities
name2str = html_entity2str
for k, v in html.entities.name2codepoint.items():
name2str[k.lower()] = chr(v).encode("utf-8")
for key in sorted(name2str.keys()):
value = toStr(name2str[key])
if len(value) > 1:
raise ValueError("value = %r" % value)
log.info(" \"{0}\": 0x{1:0>4x}, # {2}".format(
key,
ord(value),
value,
))
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:], 16)
else:
c = int(s)
return chr(c)
except ValueError:
return '&#'+ s +';'
else:
# Cannot use name2codepoint directly, because HTMLParser
# supports apos, which is not part of HTML 4
import html.entities
if HTMLParser.entitydefs is None:
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
for k, v in html.entities.name2codepoint.items():
entitydefs[k] = chr(v)
try:
return self.entitydefs[s]
except KeyError:
return '&'+s+';'
def _build_unicode_map() -> Dict[str, str]:
unicode_map = {}
for name, value in html.entities.name2codepoint.items():
unicode_map[name] = chr(value)
return unicode_map
def _build_unicode_map():
unicode_map = {}
for name, value in htmlentitydefs.name2codepoint.items():
unicode_map[name] = unichr(value)
return unicode_map
Kid/ElementTree.
The parsing is initiated by iterating over the parser object:
>>> parser = XMLParser(StringIO('Foo'))
>>> for kind, data, pos in parser:
... print(('%s %s' % (kind, data)))
START (QName('root'), Attrs([(QName('id'), '2')]))
START (QName('child'), Attrs())
TEXT Foo
END child
END root
"""
_entitydefs = ['' % (name, value) for name, value in
list(entities.name2codepoint.items())]
_external_dtd = '\n'.join(_entitydefs).encode('utf-8')
def __init__(self, source, filename=None, encoding=None):
"""Initialize the parser for the given XML input.
:param source: the XML text as a file-like object
:param filename: the name of the file, if appropriate
:param encoding: the encoding of the file; if not specified, the
encoding is assumed to be ASCII, UTF-8, or UTF-16, or
whatever the encoding specified in the XML declaration
(if any)
"""
self.source = source
self.filename = filename
# Setup the Expat parser
def replaceEntities(s):
s = s.groups()[0]
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:], 16)
else:
c = int(s)
return chr(c)
else:
# Cannot use name2codepoint directly, because HTMLParser
# supports apos, which is not part of HTML 4
import html.entities
if HTMLParser.entitydefs is None:
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
for k, v in html.entities.name2codepoint.items():
entitydefs[k] = chr(v)
try:
return self.entitydefs[s]
except KeyError:
return '&'+s+';'
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:], 16)
else:
c = int(s)
return chr(c)
except ValueError:
return '&#'+ s +';'
else:
# Cannot use name2codepoint directly, because HTMLParser
# supports apos, which is not part of HTML 4
import html.entities
if HTMLParser.entitydefs is None:
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
for k, v in html.entities.name2codepoint.items():
entitydefs[k] = chr(v)
try:
return self.entitydefs[s]
except KeyError:
return '&'+s+';'
if s[0] == "#":
s = s[1:]
if s[0] in ['x','X']:
c = int(s[1:], 16)
else:
c = int(s)
return chr(c)
except ValueError:
return '&#'+ s +';'
else:
# Cannot use name2codepoint directly, because HTMLParser
# supports apos, which is not part of HTML 4
import html.entities
if HTMLParser.entitydefs is None:
entitydefs = HTMLParser.entitydefs = {'apos':"'"}
for k, v in html.entities.name2codepoint.items():
entitydefs[k] = chr(v)
try:
return self.entitydefs[s]
except KeyError:
return '&'+s+';'