How to use the html.entities.name2codepoint function in html

To help you get started, we’ve selected a few html examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Dieterbe / rss2email / html2text.py View on Github external
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])
github airnotifier / airnotifier / util.py View on Github external
def handle_entityref(self, name):
        codepoint = name2codepoint[name]
        self.result.append(chr(codepoint))
github earthreader / libearth / libearth / sanitizer.py View on Github external
:returns: cleaned plain text
    :rtype: :class:`str`

    .. versionadded:: 0.4.0
       The ``base_uri`` parameter.

    """
    parser = HtmlSanitizer(base_uri)
    parser.feed(html)
    return ''.join(parser.fed)


class MarkupTagCleaner(HTMLParser.HTMLParser):
    """HTML parser that is internally used by :func:`clean_html()` function."""

    entity_map = htmlentitydefs.name2codepoint

    def __init__(self):
        HTMLParser.HTMLParser.__init__(self)
        self.fed = []

    def handle_data(self, d):
        self.fed.append(d)

    def handle_entityref(self, name):
        try:
            codepoint = self.entity_map[name]
        except KeyError:
            pass
        else:
            self.fed.append(unichr(codepoint))
github naftaliharris / tauthon / Lib / packaging / pypi / simple.py View on Github external
def _decode_entity(self, match):
        what = match.group(1)
        if what.startswith('#x'):
            what = int(what[2:], 16)
        elif what.startswith('#'):
            what = int(what[1:])
        else:
            from html.entities import name2codepoint
            what = name2codepoint.get(what, match.group(0))
        return chr(what)
github gramps-project / addons-source / contrib / HeadlineNewsGramplet / HeadlineNewsGramplet.py View on Github external
def substitute(match):
    ent = match.group(2)
    if match.group(1) == "#":
        try:
            return unichr(int(ent))
        except:
            return chr(int(ent))
    else:
        cp = n2cp.get(ent)
        if cp:
            try:
                return unichr(cp)
            except:
                return chr(cp)
        else:
            return match.group()
github PyAr / CDPedia / src / third_party / werkzeug / utils.py View on Github external
>>> html.p(class_='foo', *[html.a('foo', href='foo.html'), ' ',
    ...                        html.a('bar', href='bar.html')])
    u'<p class="foo"><a href="foo.html">foo</a> <a href="bar.html">bar</a></p>'

    This class works around some browser limitations and can not be used for
    arbitrary SGML/XML generation.  For that purpose lxml and similar
    libraries exist.

    Calling the builder escapes the string passed:

    &gt;&gt;&gt; html.p(html(""))
    u'<p>&lt;foo&gt;</p>'
    """

    _entity_re = re.compile(r"&amp;([^;]+);")
    _entities = name2codepoint.copy()
    _entities["apos"] = 39
    _empty_elements = {
        "area",
        "base",
        "basefont",
        "br",
        "col",
        "command",
        "embed",
        "frame",
        "hr",
        "img",
        "input",
        "keygen",
        "isindex",
        "link",
github qbittorrent / qBittorrent / src / searchengine / nova3 / helpers.py View on Github external
def htmlentitydecode(s):
    # First convert alpha entities (such as &eacute;)
    # (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
    def entity2char(m):
        entity = m.group(1)
        if entity in html.entities.name2codepoint:
            return chr(html.entities.name2codepoint[entity])
        return " "  # Unknown entity: We replace with a space.
    t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)

    # Then convert numerical entities (such as &#233;)
    t = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), t)

    # Then convert hexa entities (such as &#x00E9;)
    return re.sub(r'&#x(\w+);', lambda x: chr(int(x.group(1), 16)), t)
github qbittorrent / qBittorrent / src / searchengine / nova3 / helpers.py View on Github external
def entity2char(m):
        entity = m.group(1)
        if entity in html.entities.name2codepoint:
            return chr(html.entities.name2codepoint[entity])
        return " "  # Unknown entity: We replace with a space.
    t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)