Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
out: possible custom replacement for self.outtextf (which
appends lines of text).
baseurl: base URL of the document we process
"""
kwargs = {}
if sys.version_info >= (3, 4):
kwargs['convert_charrefs'] = False
HTMLParser.HTMLParser.__init__(self, **kwargs)
# Config options
self.split_next_td = False
self.td_count = 0
self.table_start = False
self.unicode_snob = config.UNICODE_SNOB # covered in cli
self.escape_snob = config.ESCAPE_SNOB # covered in cli
self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
self.body_width = bodywidth # covered in cli
self.skip_internal_links = config.SKIP_INTERNAL_LINKS # covered in cli
self.inline_links = config.INLINE_LINKS # covered in cli
self.protect_links = config.PROTECT_LINKS # covered in cli
self.google_list_indent = config.GOOGLE_LIST_INDENT # covered in cli
self.ignore_links = config.IGNORE_ANCHORS # covered in cli
self.ignore_images = config.IGNORE_IMAGES # covered in cli
self.images_to_alt = config.IMAGES_TO_ALT # covered in cli
self.images_with_size = config.IMAGES_WITH_SIZE # covered in cli
self.ignore_emphasis = config.IGNORE_EMPHASIS # covered in cli
self.bypass_tables = config.BYPASS_TABLES # covered in cli
self.ignore_tables = config.IGNORE_TABLES # covered in cli
self.google_doc = False # covered in cli
self.ul_item_mark = '*' # covered in cli
self.emphasis_mark = '_' # covered in cli
self.strong_mark = '**'
def setup_html2text(self, section='DEFAULT'):
"""Setup html2text globals to match our configuration
Html2text unfortunately uses globals (instead of keyword
arguments) to configure its conversion.
"""
if section not in self:
section = 'DEFAULT'
_html2text.config.UNICODE_SNOB = self.getboolean(
section, 'unicode-snob')
_html2text.config.LINKS_EACH_PARAGRAPH = self.getboolean(
section, 'links-after-each-paragraph')
# hack to prevent breaking the default in every existing config file
body_width = self.getint(section, 'body-width')
_html2text.config.BODY_WIDTH = 0 if body_width < 0 else 78 if body_width == 0 else body_width
dest="use_automatic_links",
default=config.USE_AUTOMATIC_LINKS,
help="Do not use automatic links wherever applicable"
)
p.add_option(
"--no-skip-internal-links",
action="store_false",
dest="skip_internal_links",
default=config.SKIP_INTERNAL_LINKS,
help="Do not skip internal links"
)
p.add_option(
"--links-after-para",
action="store_true",
dest="links_each_paragraph",
default=config.LINKS_EACH_PARAGRAPH,
help="Put links after each paragraph instead of document"
)
p.add_option(
"--mark-code",
action="store_true",
dest="mark_code",
default=config.MARK_CODE,
help="Mark program code blocks with [code]...[/code]"
)
p.add_option(
"--decode-errors",
dest="decode_errors",
action="store",
type="string",
default=config.DECODE_ERRORS,
help="What to do in case of decode errors.'ignore', 'strict' and "
dest="use_automatic_links",
default=config.USE_AUTOMATIC_LINKS,
help="Do not use automatic links wherever applicable"
)
p.add_option(
"--no-skip-internal-links",
action="store_false",
dest="skip_internal_links",
default=config.SKIP_INTERNAL_LINKS,
help="Do not skip internal links"
)
p.add_option(
"--links-after-para",
action="store_true",
dest="links_each_paragraph",
default=config.LINKS_EACH_PARAGRAPH,
help="Put links after each paragraph instead of document"
)
p.add_option(
"--mark-code",
action="store_true",
dest="mark_code",
default=config.MARK_CODE,
help="Mark program code blocks with [code]...[/code]"
)
p.add_option(
"--decode-errors",
dest="decode_errors",
action="store",
type="string",
default=config.DECODE_ERRORS,
help="What to do in case of decode errors.'ignore', 'strict' and "