How to use the comiccrawler.core.grabhtml function in comiccrawler

To help you get started, we’ve selected a few comiccrawler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github eight04 / ComicCrawler / comiccrawler / mods / xznj120.py View on Github external
def get_images(html, url):
	script = re.search(r'', html).group(1)
	show_js_src = re.search(r'src="([^"]+?show\.\d+\.js[^"]*)', html).group(1)
	show_js = grabhtml(urljoin(url, show_js_src))
	real_pic_fn = re.search(r'(function f_qTcms_Pic_curUrl_realpic[\s\S]+?)function', show_js).group(1)
	code = """
	{script}
	{real_pic_fn}
	function base64_decode(data) {{
		return Buffer.from(data, "base64").toString();
	}}
	// m.wuyouhui.net/template/wap1/css/d7s/js/show.20170501.js?20190506201115
	Buffer.from(qTcms_S_m_murl_e, "base64")
		.toString()
		.split("$qingtiandy$")
		.filter(u => !/^(--|\+)/.test(u))
		.map(f_qTcms_Pic_curUrl_realpic);
	""".format(script=script, real_pic_fn=real_pic_fn)
	return [urljoin(url, i) for i in eval(code)]
github eight04 / ComicCrawler / comiccrawler / mods / yoedge.py View on Github external
def get_images(html, url):
	cfg = grabhtml(urljoin(url, "smp_cfg.json"))
	cfg = json.loads(cfg)
	
	pages = [cfg["pages"]["page"][i] for i in cfg["pages"]["order"]]
	return [urljoin(url, p) for p in pages]
github eight04 / ComicCrawler / comiccrawler / mods / wix.py View on Github external
def get_episodes(html, url):
	public_model = re.search("var publicModel = ({.+})", html).group(1)
	public_model = json.loads(public_model)
	
	pages = public_model["pageList"]["pages"]
	
	s = []
	
	for page in pages:
		try:
			data = grabhtml(page["urls"][0])
		except KeyError:
			data = grabhtml("https://static.wixstatic.com/sites/" + page["pageJsonFileName"] + ".z?v=3")
		
		data = json.loads(data)
		data = data["data"]["document_data"]
		
		for item in data.values():
			if item["type"] != "Image":
				continue
			s.append(Episode(
				"{} - {}".format(page["title"], trim_ext(item.get("title", "")) or item["id"])		,
				"https://static.wixstatic.com/media/" + item["uri"],
				image="https://static.wixstatic.com/media/" + item["uri"]
			))
			
	return s
github eight04 / ComicCrawler / comiccrawler / mods / toho.py View on Github external
def get():
		content = grabhtml(url, params={
			"did": did,
			"sid": sid,
			"iid": str(iid)
		})
		return json.loads(content)["Code"]
	return get
github eight04 / ComicCrawler / comiccrawler / mods / nico.py View on Github external
def get_images(html, url):
	if "" in html:
		raise PauseDownloadError("You didn't login!")

	source_url = re.search(r'href="(/image/source/\d+)', html)
	if source_url:
		# FIXME: The image is downloaded twice within redirect!
		source_url = urljoin(url, source_url.group(1))
		source_html = grabhtml(source_url)
		image = re.search(r'src="(/priv/[^"]+)', source_html)
		if image:
			image = "http://lohas.nicoseiga.jp" + image.group(1)
		else:
			image = source_url
		return [image]

	img = re.search(r'href="(/image/source\?id=\d+)', html).group(1)
	return [urljoin(url, img)]
github eight04 / ComicCrawler / comiccrawler / mods / flickr.py View on Github external
def query_video(id, secret, key):
	rs = grabhtml("https://api.flickr.com/services/rest", params={
		"photo_id": id,
		"secret": secret,
		"method": "flickr.video.getStreamInfo",
		"api_key": key,
		"format": "json",
		"nojsoncallback": "1"
	})
	rs = json.loads(rs)
	return sorted(rs["streams"]["stream"], key=key_func)[-1]["_content"]
github eight04 / ComicCrawler / comiccrawler / mods / dm5.py View on Github external
def grab_page(page):
		params = {
			"cid": cid,
			"page": page + 1,
			"language": 1,
			"key": key,
			"gtk": 6,
			"_cid": cid,
			"_mid": mid,
			"_dt": dt,
			"_sign": sign
		}
		fun_url = urljoin(url, "chapterfun.ashx")
		text = grabhtml(fun_url, referer=url, params=params)
		d = eval(text)
		for i, image in enumerate(d):
			pages[i + page] = image
github eight04 / ComicCrawler / comiccrawler / mods / sfacg.py View on Github external
def get_images(html, url):
	js = re.search("src=\"(/Utility/.+?\.js)\"", html).group(1)
	base = re.search("(https?://[^/]+)", url).group(1)

	htmljs = grabhtml(base + js)
	# host = "http://coldpic.sfacg.com"
	pics = re.findall("picAy\[\d+\] = \"(.+?)\"", htmljs)
	return [base + pic for pic in pics]
github eight04 / ComicCrawler / comiccrawler / mods / flickr.py View on Github external
"page": page,
		"extras": "media,url_sq,url_q,url_t,url_s,url_n,url_w,url_m,url_z,url_c,url_l,url_h,url_k,url_3k,url_4k,url_f,url_5k,url_6k,url_o",
		"api_key": key,
		"format": "json",
		"nojsoncallback": 1
	}
	match = re.search("/(?:sets|albums)/([^/]+)", url)
	if match:
		set_id = match.group(1)
		params["method"] = "flickr.photosets.getPhotos"
		params["photoset_id"] = set_id
	else:
		params["method"] = "flickr.people.getPhotos"
		params["user_id"] = nsid
		
	rs = grabhtml("https://api.flickr.com/services/rest", params=params)
	# pylint: disable=no-member
	# https://github.com/PyCQA/pylint/issues/922
	rs = json.loads(rs)
	return (rs.get("photos") or rs.get("photoset"))["photo"]