Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def start_requests(self):
start_url = urltemplate.format(min_weight, max_weight)
yield scrapy.Request(
url=start_url,
callback=lambda x: self.parse_range(x, min_weight, max_weight))
def setUp(self):
self.spider = Spider("foo")
self.settings = Settings()
self.settings.setmodule(default_settings)
self.settings.setdict(self.local_settings)
self.storage = MongoStorage(self.settings)
self.storage.open_spider(self.spider)
def _assert_request_no3xx(self, pipeline_class, settings):
pipe = pipeline_class(settings=Settings(settings))
request = Request('http://url')
pipe._modify_media_request(request)
self.assertIn('handle_httpstatus_list', request.meta)
for status, check in [
(200, True),
# These are the status codes we want
# the downloader to handle itself
(301, False),
(302, False),
(302, False),
(307, False),
(308, False),
# we still want to get 4xx and 5xx
(400, True),
# embed C1 and C3 for scrapytest.org/foo
req = Request("http://scrapytest.org/foo")
self.mw.process_request(req, self.spider)
assert req.headers.get("Cookie") in (
b"C1=value1; C3=value3",
b"C3=value3; C1=value1",
)
# embed C2 for scrapytest.org/bar
req = Request("http://scrapytest.org/bar")
self.mw.process_request(req, self.spider)
self.assertEqual(req.headers.get("Cookie"), b"C2=value2")
# embed nothing for scrapytest.org/baz
req = Request("http://scrapytest.org/baz")
self.mw.process_request(req, self.spider)
assert "Cookie" not in req.headers
def start_requests(self):
for url in self.start_urls:
yield Request(url, self.parse, errback=self.on_error)
# -*- coding: utf-8 -*-
"""Stuff to pull from a New Yorker article."""
import scrapy
class NewYorkerItem(scrapy.Item):
"""Pull the title, author, text, and link."""
title = scrapy.Field()
author = scrapy.Field()
text = scrapy.Field()
link = scrapy.Field()
def test_writer_closed_on_spider_closed_signal(self):
self.crawler_mock.signals.connect.assert_called_once_with(
self.instance.spider_closed,
signal=signals.spider_closed
)
with mock.patch.object(self.instance, '_writer') as writer_mock:
self.instance.spider_closed(self.spider)
writer_mock.close.assert_called_once_with()
def record():
"""Return results from the T2K spider."""
spider = t2k_spider.T2kSpider()
response = fake_response_from_file('t2k/test_1.html')
selector = Selector(response, type='html')
nodes = selector.xpath('//%s' % spider.itertag)
spider.domain = "file:///tests/responses/t2k/"
parsed_node = spider.parse_node(response, nodes[0])
splash_response = fake_response_from_file('t2k/001.html')
splash_response.meta["date"] = parsed_node.meta["date"]
splash_response.meta["title"] = parsed_node.meta["title"]
splash_response.meta["urls"] = parsed_node.meta["urls"]
splash_response.meta["authors"] = parsed_node.meta["authors"]
parsed_item = spider.scrape_for_pdf(splash_response).next()
assert parsed_item
assert parsed_item.record
return parsed_item.record
def test_badly_encoded_body(self):
# \xe9 alone isn't valid utf8 sequence
r1 = TextResponse('http://www.example.com',
body=b'<p>an Jos\xe9 de</p>',
encoding='utf-8')
Selector(r1).xpath('//text()').getall()
def urls():
spider = base_spider.BaseSpider()
response = fake_response_from_file('base/test_1.xml')
selector = Selector(response, type='xml')
spider._register_namespaces(selector)
nodes = selector.xpath('.//%s' % spider.itertag)
return spider.get_urls_in_record(nodes[0])