Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def open(self, spider):
self.spider = spider
self.mqs = PriorityQueue(self._newmq)
self.dqs = self._dq() if self.dqdir else None
return self.df.open()
def _dq(self):
activef = join(self.dqdir, 'active.json')
if exists(activef):
with open(activef) as f:
prios = json.load(f)
else:
prios = ()
q = PriorityQueue(self._newdq, startprios=prios)
if q:
log.msg(format="Resuming crawl (%(queuesize)d requests scheduled)",
spider=self.spider, queuesize=len(q))
return q
if slot not in self.pqueues:
self.pqueues[slot] = self.pqfactory()
queue = self.pqueues[slot]
queue.push(obj, priority)
def close(self):
active = {slot: queue.close()
for slot, queue in self.pqueues.items()}
self.pqueues.clear()
return active
def __len__(self):
return sum(len(x) for x in self.pqueues.values()) if self.pqueues else 0
class ScrapyPriorityQueue(PriorityQueue):
"""
PriorityQueue which works with scrapy.Request instances and
can optionally convert them to/from dicts before/after putting to a queue.
"""
def __init__(self, crawler, qfactory, startprios=(), serialize=False):
super(ScrapyPriorityQueue, self).__init__(qfactory, startprios)
self.serialize = serialize
self.spider = crawler.spider
@classmethod
def from_crawler(cls, crawler, qfactory, startprios=(), serialize=False):
return cls(crawler, qfactory, startprios, serialize)
def push(self, request, priority=0):
if self.serialize:
request = request_to_dict(request, self.spider)
def __init__(self, settings):
self.settings = settings
self.mq_class = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
self.mqs = PriorityQueue(self.priority)
self.status = ScheduleStatus()