How to use the cssselect.HTMLTranslator function in cssselect

To help you get started, we’ve selected a few cssselect examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

zopyx / print-css-rocks / lessons / in-progress / lesson-multi-column-float-to-landscape / lib / python3.4 / site-packages / cssselect / tests.py View on Github

def test_select(self):
        document = etree.fromstring(HTML_IDS)
        sort_key = dict(
            (el, count) for count, el in enumerate(document.getiterator())
        ).__getitem__
        css_to_xpath = GenericTranslator().css_to_xpath
        html_css_to_xpath = HTMLTranslator().css_to_xpath

        def select_ids(selector, html_only):
            xpath = css_to_xpath(selector)
            items = document.xpath(xpath)
            if html_only:
                assert items == []
                xpath = html_css_to_xpath(selector)
                items = document.xpath(xpath)
            items.sort(key=sort_key)
            return [element.get('id', 'nil') for element in items]

        def pcss(main, *selectors, **kwargs):
            html_only = kwargs.pop('html_only', False)
            result = select_ids(main, html_only)
            for selector in selectors:
                assert select_ids(selector, html_only) == result

NikolaiT / GoogleScraper / GoogleScraper / parsing.py View on Github

self.query = query
        self.html = html
        self.dom = None
        self.search_results = {}
        self.num_results_for_query = ''
        self.num_results = 0
        self.effective_query = ''
        self.page_number = -1
        self.no_results = False

        # to be set by the implementing sub classes
        self.search_engine = ''

        # short alias because we use it so extensively
        self.css_to_xpath = HTMLTranslator().css_to_xpath

        if self.html:
            self.parse()

jurismarches / chopper / chopper / css / translator.py View on Github

from cssselect import HTMLTranslator


class XpathTranslator(HTMLTranslator):
    """
    Custom xpath translator
    """
    def pseudo_matches_if_exists(self, xpath):
        """
        Returns the default xpath
        """
        return xpath

    xpath_link_pseudo = pseudo_matches_if_exists
    xpath_visited_pseudo = pseudo_matches_if_exists
    xpath_hover_pseudo = pseudo_matches_if_exists
    xpath_active_pseudo = pseudo_matches_if_exists
    xpath_focus_pseudo = pseudo_matches_if_exists
    xpath_target_pseudo = pseudo_matches_if_exists
    xpath_enabled_pseudo = pseudo_matches_if_exists

fated / calibre_amazon_cn / worker.py View on Github

def CSSSelect(expr):
    from cssselect import HTMLTranslator
    from lxml.etree import XPath
    return XPath(HTMLTranslator().css_to_xpath(expr))

scrapy / scrapy / scrapy / selector / csssel.py View on Github

raise ExpressionError(
                "Expected a single string or ident for ::attr(), got %r"
                % function.arguments)
        return ScrapyXPathExpr.from_xpath(xpath,
            attribute=function.arguments[0].value)

    def xpath_text_simple_pseudo_element(self, xpath):
        """Support selecting text nodes using ::text pseudo-element"""
        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)


class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
    pass


class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
    pass


class CSSSelectorMixin(object):

    def select(self, css):
        xpath = self._css2xpath(css)
        return super(CSSSelectorMixin, self).select(xpath)

    def _css2xpath(self, css):
        return self.translator.css_to_xpath(css)


class CSSSelector(CSSSelectorMixin, XPathSelector):
    translator = ScrapyHTMLTranslator()

palexu / send2kindle / calibre / ebooks / oeb / transforms / split.py View on Github

def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            from calibre.ebooks.oeb.stylizer import fix_namespace
            css_to_xpath = HTMLTranslator().css_to_xpath
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
            for rule in rules(stylesheets):
                before = getattr(rule.style.getPropertyCSSValue(
                    'page-break-before'), 'cssText', '').strip().lower()
                after  = getattr(rule.style.getPropertyCSSValue(
                    'page-break-after'), 'cssText', '').strip().lower()
                try:
                    if before and before not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))),
                            True))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-before')
                except:
                    pass

pmyteh / RISJbot / RISJbot / utils.py View on Github

def mutate_selector_del(selector, method, expression):
    """Under the covers, Selectors contain an lxml.etree.Element document
       root, which is not exposed by the Selector interface. This is mutatable
       using the .remove method on parts of the selector.root document tree.
       Unfortunately, there is no native content removal interface in scrapy.

       As this is not using a published interface for Selector, it must be
       considered risky. In particular, it is feasible (though not likely) that
       scrapy could change its selector implementation to use a different
       HTML/XML parsing library, at which point this would fail.
    """
    try:
        if method == 'xpath':
            s = expression
        elif method == 'css':
            s = HTMLTranslator().css_to_xpath(expression)
        else:
            raise NotImplementedError

        for node in selector.root.xpath(s):
           node.getparent().remove(node)
    except Exception as e:
        logger.error('mutate_selector_del({}, {}, {},) failed: {}'.format(
                        selector,
                        method,
                        expression,
                        e))

scrapy / scrapy / scrapy / selector / csstranslator.py View on Github

raise ExpressionError(
                "Expected a single string or ident for ::attr(), got %r"
                % function.arguments)
        return ScrapyXPathExpr.from_xpath(xpath,
            attribute=function.arguments[0].value)

    def xpath_text_simple_pseudo_element(self, xpath):
        """Support selecting text nodes using ::text pseudo-element"""
        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)


class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
    pass


class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
    pass

lorien / ioweb / ioweb / response.py View on Github

def process_query(self, query):
        xpath_query = HTMLTranslator().css_to_xpath(query)
        return super(CssSelector, self).process_query(xpath_query)

How to use the cssselect.HTMLTranslator function in cssselect

To help you get started, we’ve selected a few cssselect examples, based on popular ways it is used in public projects.

cssselect

Package Health Score

Popular cssselect functions

Similar packages