Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def handle_error(self, error):
"""Send error to error handler."""
handler = getattr(self.mod, "errorhandler", None)
if not handler:
return
try:
handler(error, self)
except Exception as err: # pylint: disable=broad-except
print("[Crawler] Failed to handle error: {}".format(err))
if ep.skip or ep.complete:
continue
print("Downloading ep {}".format(ep.title))
try:
crawler = Crawler(mission, ep, savepath)
crawlpage(crawler)
except LastPageError:
print("Episode download complete!")
ep.complete = True
download_ch.pub("DOWNLOAD_EP_COMPLETE", (mission, ep))
except SkipEpisodeError as err:
print("Something bad happened, skip the episode.")
if err.always:
ep.skip = True
def _():
missions = table.selected()
titles = [ m.title for m in missions ]
mission_manager.add("library", *missions)
print("已加入圖書館︰{}".format(", ".join(titles)))
def do_analyze(self):
"""Analyze inner"""
print("Start analyzing {}".format(self.mission.url))
self.mission.state = "ANALYZING"
# one-time mission
if self.is_onetime():
print("It's one-time mission")
ep = self.mission.episodes[0]
if ep.skip or ep.complete:
self.mission.state = "FINISHED"
else:
self.mission.state = "UPDATE"
print("Analyzing success!")
return
self.html = self.downloader.html(self.mission.url, retry=True)
if not self.mission.title:
self.mission.title = self.mission.module.get_title(
self.html, self.mission.url)
self.analyze_pages()
if self.is_new:
self.mission.state = "ANALYZED"
elif all(e.complete or e.skip for e in self.mission.episodes):
self.mission.state = "FINISHED"
def download(mission, savepath):
"""Download mission to savepath."""
# warning there is a deadlock,
# never do mission.lock.acquire in callback...
print("Start downloading " + mission.title)
mission.state = "DOWNLOADING"
try:
crawl(mission, savepath)
# Check if mission is complete
for ep in mission.episodes:
if not ep.complete and not ep.skip:
raise Exception("Mission is not completed")
except WorkerExit:
mission.state = "PAUSE"
download_ch.pub('DOWNLOAD_PAUSE', mission)
raise
except PauseDownloadError as err:
mission.state = "ERROR"
def analyze_pages(self):
"""Crawl for each pages"""
url = self.mission.url
old_eps = EpisodeList(self.mission.episodes or ())
new_eps = EpisodeList()
while True:
try:
eps = self.mission.module.get_episodes(self.html, url)
except SkipPageError:
pass
else:
if not eps:
print("Warning: get_episodes returns an empty list")
self.transform_title(eps)
eps = EpisodeList(eps)
# add result episodes into new_eps in new to old order.
for ep in reversed(eps):
new_eps.add(ep)
# FIXME: do we really need this check?
# one-time mission?
if self.is_onetime(new_eps):
break
# duplicate with old_eps
if any(e in old_eps for e in eps):
break
def handle_error(self, error):
"""Send error to error handler."""
handler = getattr(self.mod, "errorhandler", None)
if not handler:
return
try:
handler(error, self)
except Exception as err: # pylint: disable=broad-except
print("[Crawler] Failed to handle error: {}".format(err))
def analyze_pages(self):
"""Crawl for each pages"""
url = self.mission.url
old_eps = EpisodeList(self.mission.episodes or ())
new_eps = EpisodeList()
while True:
try:
eps = self.mission.module.get_episodes(self.html, url)
except SkipPageError:
pass
except LastPageError:
break
else:
if not eps:
print("Warning: get_episodes returns an empty list")
self.transform_title(eps)
eps = EpisodeList(eps)
# add result episodes into new_eps in new to old order.
for ep in reversed(eps):
new_eps.add(ep)
# FIXME: do we really need this check?
# one-time mission?
if self.is_onetime(new_eps):
break
# duplicate with old_eps
if any(e in old_eps for e in eps):
break
crawler.init_images()
if not crawler.image:
debug_log("D_NEXT_PAGE")
crawler.next_page()
return
if crawler.page_exists():
debug_log("D_NEXT_IMAGE")
print("page {} already exist".format(crawler.ep.total + 1))
crawler.next_image()
return
debug_log("D_RESOLVE")
crawler.resolve_image()
print("Downloading {} page {}: {}\n".format(
crawler.ep.title, crawler.ep.total + 1, crawler.image.url))
debug_log("D_DOWNLOAD")
crawler.download_image()
debug_log("D_HANDLE")
crawler.handle_image()
debug_log("D_SAVE")
crawler.save_image()
debug_log("D_PUB")
mission_ch.pub("MISSION_PROPERTY_CHANGED", crawler.mission)
debug_log("D_REST")
crawler.rest()
debug_log("D_NEXT_IMAGE")
crawler.next_image()