Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def sanitize_html(html, force_https=True):
""" santise_html(html) returns some sanitized html.
It can be used to try and avoid basic html insertion attacks.
>>> sanitize_html("<p>hello</p>")
'<p>hello</p>'
>>> sanitize_html("")
''
"""
clean_html = feedparser._sanitizeHTML(html, "utf-8", "text/html")
if force_https:
return clean_html.replace('src="http://', 'src="https://')
else:
return clean_html
def sanitise_html(html, baseurl, inline, config, type):
"""Attempt to turn arbitrary feed-provided HTML into something
suitable for safe inclusion into the rawdog output. The inline
parameter says whether to expect a fragment of inline text, or a
sequence of block-level elements."""
if html is None:
return None
html = encode_references(html)
# sgmllib handles "<br>/" as a SHORTTAG; this workaround from
# feedparser.
html = re.sub(r'(\S)/>', r'\1 />', html)
html = feedparser._resolveRelativeURIs(html, baseurl, "UTF-8", type)
html = feedparser._sanitizeHTML(html, "UTF-8", type)
if not inline and config["blocklevelhtml"]:
# If we're after some block-level HTML and the HTML doesn't
# start with a block-level element, then insert a <p> tag
# before it. This still fails when the HTML contains text, then
# a block-level element, then more text, but it's better than
# nothing.
if block_level_re.match(html) is None:
html = "</p><p>" + html
if config["tidyhtml"]:
import mx.Tidy
args = { "wrap": 0, "numeric_entities": 1 }
plugins.call_hook("mxtidy_args", config, args, baseurl, inline)
output = mx.Tidy.tidy(html, None, None,
**args)[2]</p>
def do_run(self):
import feedparser
# Tweek feedparser to accept XML as content
feedparser._FeedParserMixin.unknown_starttag = feedparser_unknown_starttag
feedparser._FeedParserMixin.unknown_endtag = feedparser_unknown_endtag
feedparser._sanitizeHTML = lambda source, encoding: source
self.logger.debug('Starting')
# Does not accept events pre-dating the startup
self.last_event = time.gmtime()
if self.url == None:
raise Exception('Attribute url must be set')
while True:
self.logger.debug("Reading feed")
feed = feedparser.parse(self.url)
last_update = self.last_event
new_events=0
None, feedparser._FeedParserMixin))
if hasattr(feedparser, '_StrictFeedParser'):
feedparser._StrictFeedParser.mapContentType = (
types.MethodType(
_mapContentType,
None, feedparser._StrictFeedParser))
feedparser._LooseFeedParser.mapContentType = (
types.MethodType(
_mapContentType,
None, feedparser._LooseFeedParser))
# Change out feedparser's html sanitizer for our own based
# on BeautifulSoup and our own tag/attribute stripper.
feedparser._sanitizeHTML = sanitize_html
def get_mod_class(plugin):
"""
Converts 'lifestream.plugins.FeedPlugin' to
['lifestream.plugins', 'FeedPlugin']
"""
try:
dot = plugin.rindex('.')
except ValueError:
return plugin, ''
return plugin[:dot], plugin[dot+1:]
try:
from feedcache import Cache
from util import CacheStorage
# TODO: Use a cache storage object.
subject, sender=settings.DEFAULT_FROM_EMAIL, recip="", context=None,
html_template="", text_template="", sender_name="",
html_content="", text_content="", recip_list=None, sender_formatted=""
):
from stripogram import html2text
from feedparser import _sanitizeHTML
if not context: context = {}
if html_template:
html = render(context, html_template)
else: html = html_content
if text_template:
text = render(context, text_template)
else: text = text_content
if not text:
text = html2text(_sanitizeHTML(html,charset))
if not recip_list: recip_list = []
if recip: recip_list.append(recip)
try:
if getattr(settings, "EMAIL_USE_SSL", False):
server = SMTP_SSL(settings.EMAIL_HOST, settings.EMAIL_PORT)
else:
server = SMTP(settings.EMAIL_HOST, settings.EMAIL_PORT)
if settings.EMAIL_USE_TLS:
server.ehlo()
server.starttls()
server.ehlo()
if settings.EMAIL_HOST_USER and settings.EMAIL_HOST_PASSWORD:
server.login(
settings.EMAIL_HOST_USER, settings.EMAIL_HOST_PASSWORD
def sanitize(value):
return feedparser._sanitizeHTML(value, 'UTF-8', 'text/html')
def markdownify(value):
import feedparser
import markdown2
value = urlfinder.sub(r'<\1>', value)
value = urlfinder2.sub(r' <\1>', value)
html = markdown2.markdown(value)
html = feedparser._sanitizeHTML(html, 'utf-8')
html = html.replace('