Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, out=None, baseurl='', bodywidth=config.BODY_WIDTH):
"""
Input parameters:
out: possible custom replacement for self.outtextf (which
appends lines of text).
baseurl: base URL of the document we process
"""
kwargs = {}
if sys.version_info >= (3, 4):
kwargs['convert_charrefs'] = False
HTMLParser.HTMLParser.__init__(self, **kwargs)
# Config options
self.split_next_td = False
self.td_count = 0
self.table_start = False
self.unicode_snob = config.UNICODE_SNOB # covered in cli
def setup_html2text(self, section='DEFAULT'):
"""Setup html2text globals to match our configuration
Html2text unfortunately uses globals (instead of keyword
arguments) to configure its conversion.
"""
if section not in self:
section = 'DEFAULT'
_html2text.config.UNICODE_SNOB = self.getboolean(
section, 'unicode-snob')
_html2text.config.LINKS_EACH_PARAGRAPH = self.getboolean(
section, 'links-after-each-paragraph')
# hack to prevent breaking the default in every existing config file
body_width = self.getint(section, 'body-width')
_html2text.config.BODY_WIDTH = 0 if body_width < 0 else 78 if body_width == 0 else body_width
default=False,
help="use a dash rather than a star for unordered list items"
)
p.add_option(
"-e", "--asterisk-emphasis",
action="store_true",
dest="em_style_asterisk",
default=False,
help="use an asterisk rather than an underscore for emphasized text"
)
p.add_option(
"-b", "--body-width",
dest="body_width",
action="store",
type="int",
default=config.BODY_WIDTH,
help="number of characters per output line, 0 for no wrap"
)
p.add_option(
"-i", "--google-list-indent",
dest="list_indent",
action="store",
type="int",
default=config.GOOGLE_LIST_INDENT,
help="number of pixels Google indents nested lists"
)
p.add_option(
"-s", "--hide-strikethrough",
action="store_true",
dest="hide_strikethrough",
default=False,
help="hide strike-through text. only relevant when -g is "
default=False,
help="use a dash rather than a star for unordered list items"
)
p.add_option(
"-e", "--asterisk-emphasis",
action="store_true",
dest="em_style_asterisk",
default=False,
help="use an asterisk rather than an underscore for emphasized text"
)
p.add_option(
"-b", "--body-width",
dest="body_width",
action="store",
type="int",
default=config.BODY_WIDTH,
help="number of characters per output line, 0 for no wrap"
)
p.add_option(
"-i", "--google-list-indent",
dest="list_indent",
action="store",
type="int",
default=config.GOOGLE_LIST_INDENT,
help="number of pixels Google indents nested lists"
)
p.add_option(
"-s", "--hide-strikethrough",
action="store_true",
dest="hide_strikethrough",
default=False,
help="hide strike-through text. only relevant when -g is "
def html2text(html, baseurl='', bodywidth=None):
if bodywidth is None:
bodywidth = config.BODY_WIDTH
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
return h.handle(html)