How to use the cssselect.HTMLTranslator function in cssselect

To help you get started, we’ve selected a few cssselect examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github zopyx / print-css-rocks / lessons / in-progress / lesson-multi-column-float-to-landscape / lib / python3.4 / site-packages / cssselect / tests.py View on Github external
def test_select(self):
        document = etree.fromstring(HTML_IDS)
        sort_key = dict(
            (el, count) for count, el in enumerate(document.getiterator())
        ).__getitem__
        css_to_xpath = GenericTranslator().css_to_xpath
        html_css_to_xpath = HTMLTranslator().css_to_xpath

        def select_ids(selector, html_only):
            xpath = css_to_xpath(selector)
            items = document.xpath(xpath)
            if html_only:
                assert items == []
                xpath = html_css_to_xpath(selector)
                items = document.xpath(xpath)
            items.sort(key=sort_key)
            return [element.get('id', 'nil') for element in items]

        def pcss(main, *selectors, **kwargs):
            html_only = kwargs.pop('html_only', False)
            result = select_ids(main, html_only)
            for selector in selectors:
                assert select_ids(selector, html_only) == result
github NikolaiT / GoogleScraper / GoogleScraper / parsing.py View on Github external
self.query = query
        self.html = html
        self.dom = None
        self.search_results = {}
        self.num_results_for_query = ''
        self.num_results = 0
        self.effective_query = ''
        self.page_number = -1
        self.no_results = False

        # to be set by the implementing sub classes
        self.search_engine = ''

        # short alias because we use it so extensively
        self.css_to_xpath = HTMLTranslator().css_to_xpath

        if self.html:
            self.parse()
github jurismarches / chopper / chopper / css / translator.py View on Github external
from cssselect import HTMLTranslator


class XpathTranslator(HTMLTranslator):
    """
    Custom xpath translator
    """
    def pseudo_matches_if_exists(self, xpath):
        """
        Returns the default xpath
        """
        return xpath

    xpath_link_pseudo = pseudo_matches_if_exists
    xpath_visited_pseudo = pseudo_matches_if_exists
    xpath_hover_pseudo = pseudo_matches_if_exists
    xpath_active_pseudo = pseudo_matches_if_exists
    xpath_focus_pseudo = pseudo_matches_if_exists
    xpath_target_pseudo = pseudo_matches_if_exists
    xpath_enabled_pseudo = pseudo_matches_if_exists
github fated / calibre_amazon_cn / worker.py View on Github external
def CSSSelect(expr):
    from cssselect import HTMLTranslator
    from lxml.etree import XPath
    return XPath(HTMLTranslator().css_to_xpath(expr))
github scrapy / scrapy / scrapy / selector / csssel.py View on Github external
raise ExpressionError(
                "Expected a single string or ident for ::attr(), got %r"
                % function.arguments)
        return ScrapyXPathExpr.from_xpath(xpath,
            attribute=function.arguments[0].value)

    def xpath_text_simple_pseudo_element(self, xpath):
        """Support selecting text nodes using ::text pseudo-element"""
        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)


class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
    pass


class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
    pass


class CSSSelectorMixin(object):

    def select(self, css):
        xpath = self._css2xpath(css)
        return super(CSSSelectorMixin, self).select(xpath)

    def _css2xpath(self, css):
        return self.translator.css_to_xpath(css)


class CSSSelector(CSSSelectorMixin, XPathSelector):
    translator = ScrapyHTMLTranslator()
github palexu / send2kindle / calibre / ebooks / oeb / transforms / split.py View on Github external
def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            from calibre.ebooks.oeb.stylizer import fix_namespace
            css_to_xpath = HTMLTranslator().css_to_xpath
            self.page_break_selectors = set([])
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
            for rule in rules(stylesheets):
                before = getattr(rule.style.getPropertyCSSValue(
                    'page-break-before'), 'cssText', '').strip().lower()
                after  = getattr(rule.style.getPropertyCSSValue(
                    'page-break-after'), 'cssText', '').strip().lower()
                try:
                    if before and before not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add((XPath(fix_namespace(css_to_xpath(rule.selectorText))),
                            True))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-before')
                except:
                    pass
github pmyteh / RISJbot / RISJbot / utils.py View on Github external
def mutate_selector_del(selector, method, expression):
    """Under the covers, Selectors contain an lxml.etree.Element document
       root, which is not exposed by the Selector interface. This is mutatable
       using the .remove method on parts of the selector.root document tree.
       Unfortunately, there is no native content removal interface in scrapy.

       As this is not using a published interface for Selector, it must be
       considered risky. In particular, it is feasible (though not likely) that
       scrapy could change its selector implementation to use a different
       HTML/XML parsing library, at which point this would fail.
    """
    try:
        if method == 'xpath':
            s = expression
        elif method == 'css':
            s = HTMLTranslator().css_to_xpath(expression)
        else:
            raise NotImplementedError

        for node in selector.root.xpath(s):
           node.getparent().remove(node)
    except Exception as e:
        logger.error('mutate_selector_del({}, {}, {},) failed: {}'.format(
                        selector,
                        method,
                        expression,
                        e))
github scrapy / scrapy / scrapy / selector / csstranslator.py View on Github external
raise ExpressionError(
                "Expected a single string or ident for ::attr(), got %r"
                % function.arguments)
        return ScrapyXPathExpr.from_xpath(xpath,
            attribute=function.arguments[0].value)

    def xpath_text_simple_pseudo_element(self, xpath):
        """Support selecting text nodes using ::text pseudo-element"""
        return ScrapyXPathExpr.from_xpath(xpath, textnode=True)


class ScrapyGenericTranslator(TranslatorMixin, GenericTranslator):
    pass


class ScrapyHTMLTranslator(TranslatorMixin, HTMLTranslator):
    pass
github lorien / ioweb / ioweb / response.py View on Github external
def process_query(self, query):
        xpath_query = HTMLTranslator().css_to_xpath(query)
        return super(CssSelector, self).process_query(xpath_query)