Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_node_comparison():
html = """
<div>H3ll0</div><div id="tt"><p id="stext">Lorem ipsum dolor sit amet, ea quo modus meliore platonem.</p></div>
"""
html_parser = HTMLParser(html)
nodes = [node for node in html_parser.root.traverse(include_text=False)]
same_node_path_one = nodes[-1].parent
same_node_path_two = nodes[-2]
same_node_path_three = html_parser.css_first('#tt')
assert same_node_path_one == same_node_path_two == same_node_path_three
def test_attrs_sets_attribute():
html_parser = HTMLParser('<div></div>')
node = html_parser.css_first('div')
node.attrs['id'] = 'new_id'
assert node.attributes == {'id': 'new_id'}
def test_encoding():
html = "<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
html = HTMLParser(html)
assert html.input_encoding == 'UTF-8'
html = b"<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
html = HTMLParser(html)
assert html.input_encoding == 'UTF-8'
html = "<div>Привет мир!</div>".encode('cp1251')
assert HTMLParser(html, detect_encoding=True).input_encoding == 'WINDOWS-1251'
html_utf = ''.encode('utf-8')
assert HTMLParser(html_utf, detect_encoding=True, use_meta_tags=True).input_encoding == 'WINDOWS-1251'
def test_parser():
html = HTMLParser("")
assert isinstance(html, HTMLParser)
with pytest.raises(TypeError):
HTMLParser(123)
with pytest.raises(TypeError):
HTMLParser("asd").css(123)
def test_text_node_returns_text():
html = '<div>foo bar</div>'
html_parser = HTMLParser(html)
node = html_parser.css_first('div').child
assert node.text(deep=False) == 'foo bar'
def test_replace_with_multiple_nodes():
html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div>/div></div>')
img = html_parser.css_first('span')
img.replace_with(img.attributes.get('alt', ''))
assert html_parser.body.child.html == '<div>Get Laptop</div>'
</span></div>
def test_css_first_default():
html = "<span></span><div><p class="p3">text</p><p class="p3">sd</p></div><p></p>"
selector = ".s3"
assert HTMLParser(html).css_first(selector, default='lorem ipsum') == 'lorem ipsum'
def test_replace_with():
html_parser = HTMLParser('<div>Get <img alt="Laptop" src=""></div>')
img = html_parser.css_first('img')
img.replace_with(img.attributes.get('alt', ''))
assert html_parser.body.child.html == '<div>Get Laptop</div>'
def get_page_url(sess, url, destination):
page_url = "{0}/network/dependents?dependent_type={1}".format(url, destination.upper())
main_response = sess.get(page_url)
parsed_node = HTMLParser(main_response.text)
link = parsed_node.css('.select-menu-item')
if link:
packages = []
for i in link:
repo_url = "https://github.com/{}".format(i.attributes['href'])
res = requests.get(repo_url)
parsed_item = HTMLParser(res.text)
package_id = urlparse(i.attributes["href"]).query.split("=")[1]
selector = '.table-list-filters a:first-child'
count = parsed_item.css(selector)[0].text().split()[0].replace(",", "")
packages.append({"count": int(count), "package_id": package_id})
sorted_packages = sorted(packages, key=lambda k: k['count'], reverse=True)
most_popular_package_id = sorted_packages[0].get("package_id")
page_url = "{0}/network/dependents?dependent_type={1}&package_id={2}".format(url, destination.upper(),
most_popular_package_id)
return page_url