Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
)
p.add_option(
"--single-line-break",
action="store_true",
dest="single_line_break",
default=config.SINGLE_LINE_BREAK,
help=(
"Use a single line break after a block element rather than two "
"line breaks. NOTE: Requires --body-width=0"
)
)
p.add_option(
"--unicode-snob",
action="store_true",
dest="unicode_snob",
default=config.UNICODE_SNOB,
help="Use unicode throughout document"
)
p.add_option(
"--no-automatic-links",
action="store_false",
dest="use_automatic_links",
default=config.USE_AUTOMATIC_LINKS,
help="Do not use automatic links wherever applicable"
)
p.add_option(
"--no-skip-internal-links",
action="store_false",
dest="skip_internal_links",
default=config.SKIP_INTERNAL_LINKS,
help="Do not skip internal links"
)
"""
Input parameters:
out: possible custom replacement for self.outtextf (which
appends lines of text).
baseurl: base URL of the document we process
"""
kwargs = {}
if sys.version_info >= (3, 4):
kwargs['convert_charrefs'] = False
HTMLParser.HTMLParser.__init__(self, **kwargs)
# Config options
self.split_next_td = False
self.td_count = 0
self.table_start = False
self.unicode_snob = config.UNICODE_SNOB # covered in cli
self.escape_snob = config.ESCAPE_SNOB # covered in cli
self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH
self.body_width = bodywidth # covered in cli
self.skip_internal_links = config.SKIP_INTERNAL_LINKS # covered in cli
self.inline_links = config.INLINE_LINKS # covered in cli
self.protect_links = config.PROTECT_LINKS # covered in cli
self.google_list_indent = config.GOOGLE_LIST_INDENT # covered in cli
self.ignore_links = config.IGNORE_ANCHORS # covered in cli
self.ignore_images = config.IGNORE_IMAGES # covered in cli
self.images_to_alt = config.IMAGES_TO_ALT # covered in cli
self.images_with_size = config.IMAGES_WITH_SIZE # covered in cli
self.ignore_emphasis = config.IGNORE_EMPHASIS # covered in cli
self.bypass_tables = config.BYPASS_TABLES # covered in cli
self.ignore_tables = config.IGNORE_TABLES # covered in cli
self.google_doc = False # covered in cli
self.ul_item_mark = '*' # covered in cli
)
p.add_option(
"--single-line-break",
action="store_true",
dest="single_line_break",
default=config.SINGLE_LINE_BREAK,
help=(
"Use a single line break after a block element rather than two "
"line breaks. NOTE: Requires --body-width=0"
)
)
p.add_option(
"--unicode-snob",
action="store_true",
dest="unicode_snob",
default=config.UNICODE_SNOB,
help="Use unicode throughout document"
)
p.add_option(
"--no-automatic-links",
action="store_false",
dest="use_automatic_links",
default=config.USE_AUTOMATIC_LINKS,
help="Do not use automatic links wherever applicable"
)
p.add_option(
"--no-skip-internal-links",
action="store_false",
dest="skip_internal_links",
default=config.SKIP_INTERNAL_LINKS,
help="Do not skip internal links"
)
def setup_html2text(self, section='DEFAULT'):
"""Setup html2text globals to match our configuration
Html2text unfortunately uses globals (instead of keyword
arguments) to configure its conversion.
"""
if section not in self:
section = 'DEFAULT'
_html2text.config.UNICODE_SNOB = self.getboolean(
section, 'unicode-snob')
_html2text.config.LINKS_EACH_PARAGRAPH = self.getboolean(
section, 'links-after-each-paragraph')
# hack to prevent breaking the default in every existing config file
body_width = self.getint(section, 'body-width')
_html2text.config.BODY_WIDTH = 0 if body_width < 0 else 78 if body_width == 0 else body_width