Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_node_comparison():
html = """
<div>H3ll0</div><div id="tt"><p id="stext">Lorem ipsum dolor sit amet, ea quo modus meliore platonem.</p></div>
"""
html_parser = HTMLParser(html)
nodes = [node for node in html_parser.root.traverse(include_text=False)]
same_node_path_one = nodes[-1].parent
same_node_path_two = nodes[-2]
same_node_path_three = html_parser.css_first('#tt')
assert same_node_path_one == same_node_path_two == same_node_path_three
def test_attrs_sets_attribute():
html_parser = HTMLParser('<div></div>')
node = html_parser.css_first('div')
node.attrs['id'] = 'new_id'
assert node.attributes == {'id': 'new_id'}
def test_encoding():
html = "<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
html = HTMLParser(html)
assert html.input_encoding == 'UTF-8'
html = b"<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
html = HTMLParser(html)
assert html.input_encoding == 'UTF-8'
html = "<div>Привет мир!</div>".encode('cp1251')
assert HTMLParser(html, detect_encoding=True).input_encoding == 'WINDOWS-1251'
html_utf = ''.encode('utf-8')
assert HTMLParser(html_utf, detect_encoding=True, use_meta_tags=True).input_encoding == 'WINDOWS-1251'
def test_parser():
html = HTMLParser("")
assert isinstance(html, HTMLParser)
with pytest.raises(TypeError):
HTMLParser(123)
with pytest.raises(TypeError):
HTMLParser("asd").css(123)
def test_text_node_returns_text():
html = '<div>foo bar</div>'
html_parser = HTMLParser(html)
node = html_parser.css_first('div').child
assert node.text(deep=False) == 'foo bar'
def test_replace_with_multiple_nodes():
html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div>/div></div>')
img = html_parser.css_first('span')
img.replace_with(img.attributes.get('alt', ''))
assert html_parser.body.child.html == '<div>Get Laptop</div>'
</span></div>
def test_css_first_default():
html = "<span></span><div><p class="p3">text</p><p class="p3">sd</p></div><p></p>"
selector = ".s3"
assert HTMLParser(html).css_first(selector, default='lorem ipsum') == 'lorem ipsum'
def test_replace_with():
html_parser = HTMLParser('<div>Get <img alt="Laptop" src=""></div>')
img = html_parser.css_first('img')
img.replace_with(img.attributes.get('alt', ''))
assert html_parser.body.child.html == '<div>Get Laptop</div>'
def test_nodes():
html = (
'<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p>'
'<p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>'
)
htmlp = HTMLParser(html)
assert isinstance(htmlp.root, Node)
assert isinstance(htmlp.body, Node)
html_output = htmlp.html
assert len(html_output) >= len(html)
assert SequenceMatcher(None, html, html_output).ratio() > 0.8
def test_nodes():
html = (
'<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p>'
'<p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>'
)
htmlp = HTMLParser(html)
assert isinstance(htmlp.root, Node)
assert isinstance(htmlp.body, Node)
html_output = htmlp.html
assert len(html_output) >= len(html)
assert SequenceMatcher(None, html, html_output).ratio() > 0.8