How to use the selectolax.parser.HTMLParser function in selectolax

To help you get started, we’ve selected a few selectolax examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rushter / selectolax / tests / test_nodes.py View on Github external
def test_node_comparison():
    html = """
        <div>H3ll0</div><div id="tt"><p id="stext">Lorem ipsum dolor sit amet, ea quo modus meliore platonem.</p></div>
    """
    html_parser = HTMLParser(html)
    nodes = [node for node in html_parser.root.traverse(include_text=False)]
    same_node_path_one = nodes[-1].parent
    same_node_path_two = nodes[-2]
    same_node_path_three = html_parser.css_first('#tt')
    assert same_node_path_one == same_node_path_two == same_node_path_three
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_attrs_sets_attribute():
    html_parser = HTMLParser('<div></div>')
    node = html_parser.css_first('div')
    node.attrs['id'] = 'new_id'
    assert node.attributes == {'id': 'new_id'}
github rushter / selectolax / tests / test_parser.py View on Github external
def test_encoding():
    html = "<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
    html = HTMLParser(html)
    assert html.input_encoding == 'UTF-8'

    html = b"<div><p id="p1"></p><p id="p2"></p><p id="p3"><a>link</a></p><p id="p4"></p><p id="p5">text</p><p id="p6"></p></div>"
    html = HTMLParser(html)
    assert html.input_encoding == 'UTF-8'

    html = "<div>Привет мир!</div>".encode('cp1251')
    assert HTMLParser(html, detect_encoding=True).input_encoding == 'WINDOWS-1251'

    html_utf = ''.encode('utf-8')
    assert HTMLParser(html_utf, detect_encoding=True, use_meta_tags=True).input_encoding == 'WINDOWS-1251'
github rushter / selectolax / tests / test_parser.py View on Github external
def test_parser():
    html = HTMLParser("")
    assert isinstance(html, HTMLParser)

    with pytest.raises(TypeError):
        HTMLParser(123)

    with pytest.raises(TypeError):
        HTMLParser("asd").css(123)
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_text_node_returns_text():
    html = '<div>foo bar</div>'
    html_parser = HTMLParser(html)
    node = html_parser.css_first('div').child
    assert node.text(deep=False) == 'foo bar'
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_replace_with_multiple_nodes():
    html_parser = HTMLParser('<div>Get <span alt="Laptop"><img src="/jpg"> <div>/div&gt;</div>')
    img = html_parser.css_first('span')
    img.replace_with(img.attributes.get('alt', ''))
    assert html_parser.body.child.html == '<div>Get Laptop</div>'
</span></div>
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_css_first_default():
    html = "<span></span><div><p class="p3">text</p><p class="p3">sd</p></div><p></p>"
    selector = ".s3"
    assert HTMLParser(html).css_first(selector, default='lorem ipsum') == 'lorem ipsum'
github rushter / selectolax / tests / test_nodes.py View on Github external
def test_replace_with():
    html_parser = HTMLParser('<div>Get <img alt="Laptop" src=""></div>')
    img = html_parser.css_first('img')
    img.replace_with(img.attributes.get('alt', ''))
    assert html_parser.body.child.html == '<div>Get Laptop</div>'
github github-tooling / ghtopdep / ghtopdep / ghtopdep.py View on Github external
def get_page_url(sess, url, destination):
    page_url = "{0}/network/dependents?dependent_type={1}".format(url, destination.upper())
    main_response = sess.get(page_url)
    parsed_node = HTMLParser(main_response.text)
    link = parsed_node.css('.select-menu-item')
    if link:
        packages = []
        for i in link:
            repo_url = "https://github.com/{}".format(i.attributes['href'])
            res = requests.get(repo_url)
            parsed_item = HTMLParser(res.text)
            package_id = urlparse(i.attributes["href"]).query.split("=")[1]
            selector = '.table-list-filters a:first-child'
            count = parsed_item.css(selector)[0].text().split()[0].replace(",", "")
            packages.append({"count": int(count), "package_id": package_id})
        sorted_packages = sorted(packages, key=lambda k: k['count'], reverse=True)
        most_popular_package_id = sorted_packages[0].get("package_id")
        page_url = "{0}/network/dependents?dependent_type={1}&package_id={2}".format(url, destination.upper(),
                                                                                     most_popular_package_id)
    return page_url

selectolax

Fast HTML5 parser with CSS selectors.

MIT
Latest version published 12 days ago

Package Health Score

82 / 100
Full package analysis

Similar packages