How to use the parsel.SelectorList function in parsel

To help you get started, we’ve selected a few parsel examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / portia / slybot / slybot / plugins / scrapely_annotations / processors.py View on Github external
def _pick_elems(self, elements, parents, containers):
        closest_elements, closest_set = SelectorList(), set()
        other_elements = SelectorList()
        for element in elements:
            try:
                element_parents = element._root.iterancestors()
            except AttributeError:
                continue
            for parent in element_parents:
                if parent in parents:
                    closest_elements.append(element)
                    closest_set.add(element)

                if parent in containers and element not in closest_set:
                    break
            else:
                other_elements.append(element)
        if closest_elements:
github scrapy / scrapy / scrapy / http / response / text.py View on Github external
It accepts the same arguments as ``Request.__init__`` method,
        but ``url`` can be not only an absolute URL, but also
        
        * a relative URL;
        * a scrapy.link.Link object (e.g. a link extractor result);
        * an attribute Selector (not SelectorList) - e.g.
          ``response.css('a::attr(href)')[0]`` or
          ``response.xpath('//img/@src')[0]``.
        * a Selector for ``<a>`` or ```` element, e.g.
          ``response.css('a.my_link')[0]``.
          
        See :ref:`response-follow-example` for usage examples.
        """
        if isinstance(url, parsel.Selector):
            url = _url_from_selector(url)
        elif isinstance(url, parsel.SelectorList):
            raise ValueError("SelectorList is not supported")
        encoding = self.encoding if encoding is None else encoding
        return super(TextResponse, self).follow(url, callback,
            method=method,
            headers=headers,
            body=body,
            cookies=cookies,
            meta=meta,
            encoding=encoding,
            priority=priority,
            dont_filter=dont_filter,
            errback=errback
        )</a>
github scrapinghub / portia / slybot / slybot / plugins / scrapely_annotations / processors.py View on Github external
def _pick_elems(self, elements, parents, containers):
        closest_elements, closest_set = SelectorList(), set()
        other_elements = SelectorList()
        for element in elements:
            try:
                element_parents = element._root.iterancestors()
            except AttributeError:
                continue
            for parent in element_parents:
                if parent in parents:
                    closest_elements.append(element)
                    closest_set.add(element)

                if parent in containers and element not in closest_set:
                    break
            else:
                other_elements.append(element)
        if closest_elements:
            return closest_elements
github kevinheavey / fifa18-even-more-player-data / crawler / player_data.py View on Github external
def parse_single_player_page(url, html_dict, constants):
    player_id = id_from_url(url)
    headline_attributes_selector = parsel.Selector(html_dict['headline_attributes'])
    # actually the line below is the first three divs under the main article
    main_article_selector_list = parsel.SelectorList(parsel.Selector(item) for item in html_dict['main'])
    metadata_selector = main_article_selector_list[0]
    main_rectangle_selector_list = main_article_selector_list[1:]
    position_ratings_selector = parsel.Selector(html_dict['position_ratings'])

    all_traits = constants['traits']
    all_specialities = constants['specialities']
    all_positions = constants['positions']

    main_attributes = parse_main_attributes(main_rectangle_selector_list)
    headline_attributes = parse_headline_attributes(headline_attributes_selector)
    metadata = parse_player_metadata(metadata_selector)
    _preferred_positions = metadata.pop('preferred_positions')
    traits_and_specialities = parse_traits_and_specialities(main_rectangle_selector_list, all_traits, all_specialities)
    miscellaneous_data = parse_player_miscellaneous_data(metadata_selector)
    position_ratings = get_position_ratings(position_ratings_selector, metadata_selector, all_positions)
    position_preferences = get_full_position_preferences(_preferred_positions, all_positions)