How to use the feedparser.SANITIZE_HTML function in feedparser

To help you get started, we’ve selected a few feedparser examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lemon24 / reader / src / reader / _parser.py View on Github external
def parse(*args: Any, **kwargs: Any) -> Any:
            return feedparser.parse(
                *args, resolve_relative_uris=True, sanitize_html=True, **kwargs
            )

        yield parse

    else:

        # This is in no way thread-safe, but what can you do?
        # TODO: Well, you could use locks to make it threadsafe...
        # https://docs.python.org/3/library/threading.html#lock-objects

        old_RESOLVE_RELATIVE_URIS = feedparser.RESOLVE_RELATIVE_URIS
        old_SANITIZE_HTML = feedparser.SANITIZE_HTML
        feedparser.RESOLVE_RELATIVE_URIS = True
        feedparser.SANITIZE_HTML = True

        try:
            yield feedparser.parse
        finally:
            feedparser.RESOLVE_RELATIVE_URIS = old_RESOLVE_RELATIVE_URIS
            feedparser.SANITIZE_HTML = old_SANITIZE_HTML
github lemon24 / reader / src / reader / _parser.py View on Github external
else:

        # This is in no way thread-safe, but what can you do?
        # TODO: Well, you could use locks to make it threadsafe...
        # https://docs.python.org/3/library/threading.html#lock-objects

        old_RESOLVE_RELATIVE_URIS = feedparser.RESOLVE_RELATIVE_URIS
        old_SANITIZE_HTML = feedparser.SANITIZE_HTML
        feedparser.RESOLVE_RELATIVE_URIS = True
        feedparser.SANITIZE_HTML = True

        try:
            yield feedparser.parse
        finally:
            feedparser.RESOLVE_RELATIVE_URIS = old_RESOLVE_RELATIVE_URIS
            feedparser.SANITIZE_HTML = old_SANITIZE_HTML
github StranikS-Scan / WorldOfTanks-Decompiled / source / res / scripts / client / helpers / rssdownloader.py View on Github external
# Python bytecode 2.7 (decompiled from Python 2.7)
# Embedded file name: scripts/client/helpers/RSSDownloader.py
import threading
import helpers
import BigWorld
import feedparser
from debug_utils import LOG_WARNING, LOG_CURRENT_EXCEPTION
feedparser.PARSE_MICROFORMATS = 0
feedparser.SANITIZE_HTML = 0

class RSSDownloader(object):
    UPDATE_INTERVAL = 0.1
    lastRSS = property(lambda self: self.__lastRSS)
    isBusy = property(lambda self: self.__thread is not None)

    def __init__(self):
        self.__thread = None
        self.__lastDownloadTime = None
        self.__cbID = BigWorld.callback(self.UPDATE_INTERVAL, self.__update)
        self.__lastRSS = {}
        self.__onCompleteCallbacks = set()
        return

    def destroy(self):
        self.__thread = None
github sunlightlabs / django-feedinator / feedinator / __init__.py View on Github external
from django.conf import settings
from datetime import datetime
from feedinator.models import Feed, FeedEntry, Tag
from tz import Eastern, utc
import feedparser
import time

feedparser.SANITIZE_HTML = 0

def tuple_to_datetime(t, tz=None):
    """
    Convert a time tuple into a datetime object in the given timezone.
    """
    dt = datetime(
        year=t[0],
        month=t[1],
        day=t[2],
        hour=t[3],
        minute=t[4],
        second=t[5],
        tzinfo=tz
    )
    if tz:
        dt = dt + tz.utcoffset(dt)
github ghoseb / planet.clojure / planet / __init__.py View on Github external
logger = logging.getLogger("planet.runner")
    logger.setLevel(logging.getLevelName(level))
    try:
        logger.warning
    except:
        logger.warning = logger.warn

    loggerParms = (level,format)
    return logger

sys.path.insert(1, os.path.join(os.path.dirname(__file__),'vendor'))

# Configure feed parser
import feedparser
feedparser.SANITIZE_HTML=1
feedparser.RESOLVE_RELATIVE_URIS=0

import publish
github lemon24 / reader / reader / _feedparser_parse_data.py View on Github external
:param bool resolve_relative_uris:
        Should feedparser attempt to resolve relative URIs absolute ones within
        HTML content?  Defaults to the value of
        :data:`feedparser.RESOLVE_RELATIVE_URIS`, which is ``True``.
    :param bool sanitize_html:
        Should feedparser skip HTML sanitization? Only disable this if you know
        what you are doing!  Defaults to the value of
        :data:`feedparser.SANITIZE_HTML`, which is ``True``.

    :return: A :class:`FeedParserDict`.
    '''

    if sanitize_html is None or resolve_relative_uris is None:
        import feedparser
    if sanitize_html is None:
        sanitize_html = feedparser.SANITIZE_HTML
    if resolve_relative_uris is None:
        resolve_relative_uris = feedparser.RESOLVE_RELATIVE_URIS

    result = _make_empty_result()

    if href:
        result['href'] = href
    if response_headers:
        result['headers'] = response_headers or {}

    return _parse_data(data, result, resolve_relative_uris, sanitize_html)
github sunlightlabs / django-feedinator / feedinator / core.py View on Github external
from datetime import datetime

from django.conf import settings
from django.utils.timezone import utc, now
from feedinator.models import Feed, FeedEntry, Tag
from pytz import timezone
import feedparser

feedparser.SANITIZE_HTML = 0


def tuple_to_datetime(t, tz=None):
    """
    Convert a time tuple into a datetime object in the given timezone.
    """
    dt = datetime(
        year=t[0],
        month=t[1],
        day=t[2],
        hour=t[3],
        minute=t[4],
        second=t[5],
        tzinfo=None,
    )
    if settings.USE_TZ:
github lemon24 / reader / src / reader / _parser.py View on Github external
return feedparser.parse(
                *args, resolve_relative_uris=True, sanitize_html=True, **kwargs
            )

        yield parse

    else:

        # This is in no way thread-safe, but what can you do?
        # TODO: Well, you could use locks to make it threadsafe...
        # https://docs.python.org/3/library/threading.html#lock-objects

        old_RESOLVE_RELATIVE_URIS = feedparser.RESOLVE_RELATIVE_URIS
        old_SANITIZE_HTML = feedparser.SANITIZE_HTML
        feedparser.RESOLVE_RELATIVE_URIS = True
        feedparser.SANITIZE_HTML = True

        try:
            yield feedparser.parse
        finally:
            feedparser.RESOLVE_RELATIVE_URIS = old_RESOLVE_RELATIVE_URIS
            feedparser.SANITIZE_HTML = old_SANITIZE_HTML
github ranveeraggarwal / iitb-blog-aggregator / rawdog / rawdoglib / rawdog.py View on Github external
import urllib2

try:
	import tidylib
except:
	tidylib = None

try:
	import mx.Tidy as mxtidy
except:
	mxtidy = None

# Turn off content-cleaning, since we want to see an approximation to the
# original content for hashing. rawdog will sanitise HTML when writing.
feedparser.RESOLVE_RELATIVE_URIS = 0
feedparser.SANITIZE_HTML = 0

# Disable microformat support, because it tends to return poor-quality data
# (e.g. identifying inappropriate things as enclosures), and it relies on
# BeautifulSoup which is unable to parse many feeds.
feedparser.PARSE_MICROFORMATS = 0

# This is initialised in main().
persister = None

system_encoding = None
def get_system_encoding():
	"""Get the system encoding."""
	return system_encoding

def safe_ftime(format, t):
	"""Format a time value into a string in the current locale (as