Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import socket; socket_errors = []
for e in ['error', 'gaierror']:
if hasattr(socket, e): socket_errors.append(getattr(socket, e))
#DEPRECATED import mimify
#DEPRECATED from StringIO import StringIO as SIO
#DEPRECATED mimify.CHARSET = 'utf-8'
import feedparser
feedparser.USER_AGENT = "rss2email/"+__version__+ " +http://www.allthingsrss.com/rss2email/"
import html2text as h2t
h2t.UNICODE_SNOB = UNICODE_SNOB
h2t.LINKS_EACH_PARAGRAPH = LINKS_EACH_PARAGRAPH
h2t.BODY_WIDTH = BODY_WIDTH
html2text = h2t.html2text
from types import *
### Utility Functions ###
import threading
class TimeoutError(Exception): pass
class InputError(Exception): pass
def timelimit(timeout, function):
# def internal(function):
def internal2(*args, **kw):
"""
from http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/473878
def _convert_content_html2text(self, content):
html2text.BODY_WIDTH = 0 # Don't wrap lines
content = self._convert_wiki_toc_to_markdown(content)
content = html2text.html2text(unicode(content))
# Convert internal links
internal_url = urlsplit(self.base_url).path + 'wiki/'
internal_link_re = r'\[([^]]+)\]\(%s([^)]*)\)' % internal_url
internal_link = re.compile(internal_link_re, re.UNICODE)
def sub(match):
caption = match.group(1)
page = self.convert_title(match.group(2))
if caption == page:
link = '[%s]' % unquote(page)
else:
link = '[%s](%s)' % (caption, page)
return link
return internal_link.sub(sub, content)
def _setup(self, section='DEFAULT'):
_html2text.UNICODE_SNOB = self.getboolean(
section, 'unicode-snob', fallback=False)
_html2text.LINKS_EACH_PARAGRAPH = self.getboolean(
section, 'links-after-each-paragaph', fallback=False)
_html2text.BODY_WIDTH = self.getint(section, 'body-width', fallback=0)
def parse_ticket(self, id):
# Use CSV export to get ticket fields
url = self.full_url(self.TICKET_URL % id, 'csv')
f = self.csvopen(url)
reader = csv.DictReader(f)
ticket_fields = reader.next()
ticket_fields['class'] = 'ARTIFACT'
ticket = self.remap_fields(ticket_fields)
# Use HTML export to get ticket description and comments
import html2text
html2text.BODY_WIDTH = 0
url = self.full_url(self.TICKET_URL % id)
self.log_url(url)
d = BeautifulSoup(urlopen(url))
self.clean_missing_wiki_links(d)
desc = d.find('div', 'description').find('div', 'searchable')
ticket['description'] = html2text.html2text(
desc.renderContents('utf8').decode('utf8')) if desc else ''
comments = []
for comment in d.findAll('form', action='#comment'):
c = {}
c['submitter'] = re.sub(
r'.* by ', '', comment.find('h3', 'change').text).strip()
c['date'] = self.trac2z_date(
comment.find('a', 'timeline')['title'].replace(' in Timeline', ''))
changes = unicode(comment.find('ul', 'changes') or '')
body = comment.find('div', 'comment')
def plain2markdown(txt, preserve_multiple_spaces=False, has_html_entities=False):
if not has_html_entities:
# prevent &foo; and { from becoming HTML entities
txt = re_amp.sub('&', txt)
# avoid accidental 4-space indentations creating code blocks
if preserve_multiple_spaces:
txt = txt.replace('\t', ' ' * 4)
txt = re_preserve_spaces.sub(' ', txt)
else:
txt = re_leading_spaces.sub('', txt)
try:
# try to use html2text for most of the escaping
import html2text
html2text.BODY_WIDTH = 0
txt = html2text.escape_md_section(txt, snob=True)
except ImportError:
# fall back to just escaping any MD-special chars
txt = md_chars_matcher_all.sub(r"\\\1", txt)
# prevent < and > from becoming tags
txt = re_angle_bracket_open.sub('<', txt)
txt = re_angle_bracket_close.sub('>', txt)
return txt
def html_to_markdown(value, width=70):
html2text.BODY_WIDTH = width
return html2text.html2text(value)
from allura import model as M
from forgeblog import model as BM
from forgeblog.main import ForgeBlogApp
from allura.lib import exceptions
from allura.lib.helpers import exceptionless
from allura.lib.helpers import plain2markdown
# Everything in this file depends on html2text,
# so import attempt is placed in global scope.
try:
import html2text
except ImportError:
raise ImportError("""Importing RSS feeds requires GPL library "html2text":
https://github.com/brondsem/html2text""")
html2text.BODY_WIDTH = 0
class RssFeedsCommand(base.BlogCommand):
summary = 'Fetch external rss feeds for all Blog tools, and convert new feed entries into blog posts'
parser = base.BlogCommand.standard_parser(verbose=True)
parser.add_option('-a', '--appid', dest='appid', default='',
help='application id')
parser.add_option('-u', '--username', dest='username', default='root',
help='poster username')
def command(self):
self.basic_setup()
# If this script creates a new BlogPost, it will create an
# activitystream activity for that post. During the saving of the
# activity, User.url() will be called. This method defers to an
def mediawiki2markdown(source):
try:
import html2text
from mediawiki import wiki2html
except ImportError:
raise ImportError("""This operation requires GPL libraries:
"mediawiki" (https://github.com/zikzakmedia/python-mediawiki.git)
"html2text" (https://github.com/aaronsw/html2text.git)""")
html2text.BODY_WIDTH = 0
wiki_content = wiki2html(source, True)
wiki_content = _convert_toc(wiki_content)
markdown_text = html2text.html2text(wiki_content)
markdown_text = markdown_text.replace('<', '<').replace('>', '>')
return markdown_text