Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_images(html, url):
images = []
images.extend(try_get_videos(html, url))
images.extend(try_get_images(html))
if not images:
raise SkipEpisodeError
return images
def error_loop(process, handle_error=None, limit=10):
"""Loop process until error. Has handle error limit."""
errorcount = 0
while True:
try:
process()
except Exception as er: # pylint: disable=broad-except
traceback.print_exc()
errorcount += 1
if errorcount >= limit:
raise SkipEpisodeError(always=False)
if handle_error:
handle_error(er)
# except ExitErrorLoop:
# break
else:
errorcount = 0
def error_loop(process, handle_error=None, limit=3):
"""Loop process until error. Has handle error limit."""
errorcount = 0
while True:
try:
process()
except Exception as er: # pylint: disable=broad-except
traceback.print_exc()
errorcount += 1
if errorcount >= limit:
raise SkipEpisodeError(always=False)
if handle_error:
handle_error(er)
# except ExitErrorLoop:
# break
else:
errorcount = 0
def get_images(html, url):
imgs = xznj120.get_images(html, url)
if imgs:
return imgs
raise SkipEpisodeError
for ep in mission.episodes:
if ep.skip or ep.complete:
continue
print("Downloading ep {}".format(ep.title))
try:
crawler = Crawler(mission, ep, savepath)
crawlpage(crawler)
except LastPageError:
print("Episode download complete!")
ep.complete = True
download_ch.pub("DOWNLOAD_EP_COMPLETE", (mission, ep))
except SkipEpisodeError as err:
print("Something bad happened, skip the episode.")
if err.always:
ep.skip = True
def get_images(html, url):
try:
img = re.search('href="([^"]+)" id="highres"', html).group(1)
except AttributeError:
if "This post was deleted" in html:
raise SkipEpisodeError
raise
return urljoin(url, img)
def errorhandler(err, crawler):
if is_http(err, 410) or is_http(err, 404):
if (re.match(r"https://(live|farm\d+)\.staticflickr\.com/\d+/\d+_[a-z0-9]+_[a-z0-9]{1,2}\.\w+", err.response.url) and
crawler.ep.image):
# a specific size is deleted?
crawler.ep.image = None
# clear html to refetch the page
crawler.html = None
return
if re.match(r"https://www\.flickr\.com/photos/[^/]+/\d+/", err.response.url):
raise SkipEpisodeError