Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_smart_strings(self):
"""Lxml smart strings return values"""
class SmartStringsSelector(Selector):
_lxml_smart_strings = True
body = u"""
<div class="one">
<ul>
<li>one</li><li>two</li>
</ul>
</div>
<div class="two">
<ul>
<li>four</li><li>five</li><li>six</li>
</ul>
</div>
"""
# .getparent() is available for text nodes and attributes
def test_has_class_error_no_args(self):
body = u"""
<p class="foo">First</p>
"""
sel = Selector(text=body)
self.assertRaisesRegexp(
ValueError, 'has-class must have at least 1 argument',
sel.xpath, 'has-class()')
def _use_page(self, response):
self.response = response
self.selector = parsel.Selector(text=response.text)
def login(username, password, domain):
session = requests.Session()
response = session.get(domain)
return parsel.Selector(text=response.text)
def get_all_traits_and_specialities():
url = 'https://sofifa.com/players/top'
html = requests.get(url).text
selector = parsel.Selector(html)
path = './body/section[1]/section[1]/aside[1]/form[1]/div[last()]/div[position() >= last() - 2]/select'
relevant = selector.xpath(path)
end_of_path = 'option/text()'
traits_raw = relevant[:2].xpath(end_of_path).extract()
# some traits are duplicated unfortunately
traits = list(np.unique([t.strip() for t in traits_raw if t != 'trait.']))
all_traits = [t + '_trait' for t in traits]
specialities_raw = relevant[2].xpath(end_of_path).extract()
# it's important to add the speciality flag
# because there is a strength speciality and a strength attribute
all_specialities = [s.strip() + '_speciality' for s in specialities_raw]
return {'traits': all_traits, 'specialities': all_specialities}
def parse_single_overview_page(html):
table_selector = parsel.Selector(html)
row_dicts = []
for row_selector in table_selector.xpath('./body/table/tbody/tr'):
row_dicts.append(parse_single_row(row_selector))
return row_dicts
# Get the op named "conv_filter'
subgraph.select("#conv_filter")
# Get the "bias" ops within Affine layers
subgraph.select("Affine .bias")
# Get all TensorValueOps
subgraph.select("TensorValueOp")
# Get all ops from timestep 3 in an RNN (ie with metadata "recurrent_step=3")
subgraph.select("[recurrent_step=3]")
"""
ops = list()
for selected in parsel.Selector(self._to_xml()).css(css):
op = self._selector_to_op(selected)
if op is not None:
ops.append(op)
return ops
def parse(html):
document = Selector(text=html)
memes = [catch_errors(parse_meme, element) for element in document.css("div.pic")]
memes = [meme for meme in memes if meme is not None]
title = document.css("title::text").get()
next_page_url = "/mistrzowie/page/" + get_last_part_url(
document.css(".list_next_page_button::attr(href)").get()
)
return Page(title, memes, next_page_url)
def crawl(url):
text = requests.get(url, headers=lt.DEFAULT_HEADERS).text
text = text.replace('', '')
tree = Selector(text=text)
items = tree.css('ul#thread_list li.j_thread_list')
for item in items:
data = {}
data['title'] = item.css('a.j_th_tit::text').extract_first()
data['abstract'] = item.css('.threadlist_abs::text').extract_first().strip()
data['url'] = f"{domain}{item.css('a.j_th_tit::attr(href)').extract_first()}"
data['author'] = item.css('a.frs-author-name::text').extract_first()
data['reply'] = int(item.css('span.threadlist_rep_num::text').extract_first())
data['date'] = item.css('.threadlist_reply_date::text').extract_first().strip()
yield data
def _get_relevant_league_overview_html(league_overview_html):
selector = parsel.Selector(text=league_overview_html)
table_selector = selector.xpath('/html/body/section/section/article/table')
return table_selector.extract_first()