How to use the html2text.BODY_WIDTH function in html2text

To help you get started, we’ve selected a few html2text examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Dieterbe / rss2email / View on Github external
import socket; socket_errors = []
for e in ['error', 'gaierror']:
	if hasattr(socket, e): socket_errors.append(getattr(socket, e))

#DEPRECATED import mimify
#DEPRECATED from StringIO import StringIO as SIO
#DEPRECATED mimify.CHARSET = 'utf-8'

import feedparser
feedparser.USER_AGENT = "rss2email/"+__version__+ " +"

import html2text as h2t

html2text = h2t.html2text

from types import *

### Utility Functions ###

import threading
class TimeoutError(Exception): pass

class InputError(Exception): pass

def timelimit(timeout, function):
#    def internal(function):
        def internal2(*args, **kw):
github apache / allura / ForgeWiki / forgewiki / scripts / wiki_from_trac / View on Github external
def _convert_content_html2text(self, content):
        html2text.BODY_WIDTH = 0  # Don't wrap lines
        content = self._convert_wiki_toc_to_markdown(content)
        content = html2text.html2text(unicode(content))
        # Convert internal links
        internal_url = urlsplit(self.base_url).path + 'wiki/'
        internal_link_re = r'\[([^]]+)\]\(%s([^)]*)\)' % internal_url
        internal_link = re.compile(internal_link_re, re.UNICODE)

        def sub(match):
            caption =
            page = self.convert_title(
            if caption == page:
                link = '[%s]' % unquote(page)
                link = '[%s](%s)' % (caption, page)
            return link
        return internal_link.sub(sub, content)
github rss2email / rss2email / View on Github external
def _setup(self, section='DEFAULT'):
        _html2text.UNICODE_SNOB = self.getboolean(
            section, 'unicode-snob', fallback=False)
        _html2text.LINKS_EACH_PARAGRAPH = self.getboolean(
            section, 'links-after-each-paragaph', fallback=False)
        _html2text.BODY_WIDTH = self.getint(section, 'body-width', fallback=0)
github apache / allura / Allura / allura / scripts / View on Github external
def parse_ticket(self, id):
        # Use CSV export to get ticket fields
        url = self.full_url(self.TICKET_URL % id, 'csv')
        f = self.csvopen(url)
        reader = csv.DictReader(f)
        ticket_fields =
        ticket_fields['class'] = 'ARTIFACT'
        ticket = self.remap_fields(ticket_fields)

        # Use HTML export to get ticket description and comments
        import html2text
        html2text.BODY_WIDTH = 0
        url = self.full_url(self.TICKET_URL % id)
        d = BeautifulSoup(urlopen(url))
        desc = d.find('div', 'description').find('div', 'searchable')
        ticket['description'] = html2text.html2text(
            desc.renderContents('utf8').decode('utf8')) if desc else ''
        comments = []
        for comment in d.findAll('form', action='#comment'):
            c = {}
            c['submitter'] = re.sub(
                r'.* by ', '', comment.find('h3', 'change').text).strip()
            c['date'] = self.trac2z_date(
                comment.find('a', 'timeline')['title'].replace(' in Timeline', ''))
            changes = unicode(comment.find('ul', 'changes') or '')
            body = comment.find('div', 'comment')
github apache / allura / Allura / allura / lib / View on Github external
def plain2markdown(txt, preserve_multiple_spaces=False, has_html_entities=False):
    if not has_html_entities:
        # prevent &foo; and { from becoming HTML entities
        txt = re_amp.sub('&', txt)
    # avoid accidental 4-space indentations creating code blocks
    if preserve_multiple_spaces:
        txt = txt.replace('\t', ' ' * 4)
        txt = re_preserve_spaces.sub(' ', txt)
        txt = re_leading_spaces.sub('', txt)
        # try to use html2text for most of the escaping
        import html2text
        html2text.BODY_WIDTH = 0
        txt = html2text.escape_md_section(txt, snob=True)
    except ImportError:
        # fall back to just escaping any MD-special chars
        txt = md_chars_matcher_all.sub(r"\\\1", txt)
    # prevent < and > from becoming tags
    txt = re_angle_bracket_open.sub('<', txt)
    txt = re_angle_bracket_close.sub('>', txt)
    return txt
github tylerharper / wag / wag / View on Github external
def html_to_markdown(value, width=70):
    html2text.BODY_WIDTH = width
    return html2text.html2text(value)
github apache / allura / ForgeBlog / forgeblog / command / View on Github external
from allura import model as M
from forgeblog import model as BM
from forgeblog.main import ForgeBlogApp
from allura.lib import exceptions
from allura.lib.helpers import exceptionless
from allura.lib.helpers import plain2markdown

# Everything in this file depends on html2text,
# so import attempt is placed in global scope.
    import html2text
except ImportError:
    raise ImportError("""Importing RSS feeds requires GPL library "html2text":""")

html2text.BODY_WIDTH = 0

class RssFeedsCommand(base.BlogCommand):
    summary = 'Fetch external rss feeds for all Blog tools, and convert new feed entries into blog posts'
    parser = base.BlogCommand.standard_parser(verbose=True)
    parser.add_option('-a', '--appid', dest='appid', default='',
                      help='application id')
    parser.add_option('-u', '--username', dest='username', default='root',
                      help='poster username')

    def command(self):

        # If this script creates a new BlogPost, it will create an
        # activitystream activity for that post. During the saving of the
        # activity, User.url() will be called. This method defers to an
github apache / allura / ForgeWiki / forgewiki / View on Github external
def mediawiki2markdown(source):
        import html2text
        from mediawiki import wiki2html
    except ImportError:
        raise ImportError("""This operation requires GPL libraries:
        "mediawiki" (
        "html2text" (""")

    html2text.BODY_WIDTH = 0

    wiki_content = wiki2html(source, True)
    wiki_content = _convert_toc(wiki_content)
    markdown_text = html2text.html2text(wiki_content)
    markdown_text = markdown_text.replace('<', '<').replace('>', '>')
    return markdown_text