Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_multiple_line_break(self):
html_string = (
''
' normal text <br><br> another text '
''
)
dom = html.fromstring(html_string)
returned = html.tostring(dom).decode("utf8")
assert html_string == returned
paragraphs = ParagraphMaker.make_paragraphs(dom)
assert len(paragraphs) == 2
self.assert_paragraphs_equal(
paragraphs[0],
text="normal text",
words_count=2,
tags_count=0
)
self.assert_paragraphs_equal(
paragraphs[1],
text="another text",
words_count=2,
tags_count=0
)
def test_whitespace_handling(self):
html_string = (
''
'<p>pre<em>in</em>post \t pre <span class="class"> in </span> post</p>'
'<div>pre<em> in </em>post</div>'
'<pre>pre<em>in </em>post</pre>'
'<blockquote>pre<em> in</em>post</blockquote>'
''
)
dom = html.fromstring(html_string)
returned = html.tostring(dom).decode("utf8")
assert html_string == returned
paragraphs = ParagraphMaker.make_paragraphs(dom)
assert len(paragraphs) == 4
self.assert_paragraphs_equal(
paragraphs[0],
text="preinpost pre in post",
words_count=4,
tags_count=2
)
self.assert_paragraphs_equal(
paragraphs[1],
text="pre in post",
words_count=3,
tags_count=1
)
self.assert_paragraphs_equal(
paragraphs[2],
def test_inline_text_in_body(self):
"""Inline text should be treated as separate paragraph."""
html_string = (
''
'<sup>I am <strong>top</strong>-inline\n\n\n\n and I am happy \n</sup>'
'<p>normal text</p>'
'<code>\nvar i = -INFINITY;\n</code>'
'<div>after text with variable <var>N</var> </div>'
' I am inline\n\n\n\n and I am happy \n'
''
)
dom = html.fromstring(html_string)
paragraphs = ParagraphMaker.make_paragraphs(dom)
assert len(paragraphs) == 5
self.assert_paragraphs_equal(
paragraphs[0],
words_count=7,
tags_count=2,
text="I am top-inline\nand I am happy"
)
self.assert_paragraphs_equal(
paragraphs[1],
words_count=2,
tags_count=0,
text="normal text"
)
self.assert_paragraphs_equal(
paragraphs[2],
def test_basic(self):
html_string = (
''
'<h1>Header</h1>'
'<p>text and some <em>other</em> words <span class="class">that I</span> have in my head now</p>'
'<p>footer</p>'
''
)
dom = html.fromstring(html_string)
returned = html.tostring(dom).decode("utf8")
assert html_string == returned
paragraphs = ParagraphMaker.make_paragraphs(dom)
assert len(paragraphs) == 3
self.assert_paragraphs_equal(paragraphs[0], text="Header", words_count=1, tags_count=0)
text = "text and some other words that I have in my head now"
self.assert_paragraphs_equal(paragraphs[1], text=text, words_count=12, tags_count=2)
self.assert_paragraphs_equal(paragraphs[2], text="footer", words_count=1, tags_count=0)