Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
show_js = grabhtml(urljoin(url, show_js_src))
real_pic_fn = re.search(r'(function f_qTcms_Pic_curUrl_realpic[\s\S]+?)function', show_js).group(1)
code = """
{script}
{real_pic_fn}
function base64_decode(data) {{
return Buffer.from(data, "base64").toString();
}}
// m.wuyouhui.net/template/wap1/css/d7s/js/show.20170501.js?20190506201115
Buffer.from(qTcms_S_m_murl_e, "base64")
.toString()
.split("$qingtiandy$")
.filter(u => !/^(--|\+)/.test(u))
.map(f_qTcms_Pic_curUrl_realpic);
""".format(script=script, real_pic_fn=real_pic_fn)
return [urljoin(url, i) for i in eval(code)]
def get_next_page(html, url):
match = re.search('class="pgNext">
def errorhandler(err, crawler):
if not is_http(err, 403) or not crawler.ep.current_url:
return
match = re.search("artwork/([^/]+)", crawler.ep.current_url)
if not match:
return
artwork = match.group(1)
crawler.ep.current_url = urljoin(crawler.ep.current_url, "/projects/{}.json".format(artwork))
def get_episodes(html, url):
s = []
for match in re.finditer(r'<li><a href="([^">]+><p>([^<]+)', html):
ep_url, title = [unescape(t) for t in match.groups()]
s.append(Episode(title, urljoin(url, ep_url)))
return s[::-1]
</p></a></li>
def get_episodes(html, url):
s = []
ep_set = set()
for m in re.finditer(r'
def get_images(html, url):
data = re.search("var DATA\s*=\s*'[^']+'", html).group()
nonce = re.search("window\.nonce = (.+)", html).group(1)
nonce2 = re.search("window\[.+?=(.+)", html)
nonce2 = nonce2.group(1) if nonce2 else None
view_js = re.search('src="([^"]+?page\.chapter\.view[^"]+?\.js[^"]*)', html).group(1)
view_js = grabhtml(urljoin(url, view_js))
view_js = re.search("(eval\(.+?)\}\(\)", view_js, re.DOTALL).group(1)
code = "\n".join([
data,
"""
function createDummy() {
return new Proxy(() => true, {
get: () => createDummy()
});
}
const window = document = createDummy();
""",
"const nonce = {};".format(nonce2 or nonce),
"const W = {DATA, nonce};",
view_js
])
def get_next_page(html, url):
if is_project(url):
page = int(parse_qs(urlparse(url).query)["page"][0])
total_page = math.ceil(json.loads(html)["total_count"] / EP_PER_PAGE)
return update_qs(url, {"page": page + 1}) if page < total_page else None
if is_user_home(url):
user = re.search("www\.artstation\.com/([^/]+)", url).group(1)
return urljoin(url, "/users/{user}/projects.json?page=1".format(user=user))
def get_episodes(html, url):
prefix = re.escape(url)
s = []
for m in re.finditer(r'<a href="({}/[^/">]*>([^<]+)'.format(prefix), html):
ep_url, title = m.groups()
s.append(Episode(title, urljoin(url, ep_url)))
return s[::-1]
</a>
def get_next_page(html, url):
match = re.search(r'
def get_images(html, url):
fbset, fbid = get_url_info(url)
fb_dtsg = re.search('name="fb_dtsg" value="([^"]+)', html).group(1)
# fb_dtsg = re.search('"DTSGInitialData".*?"token":"([^"]+?)', html).group(1)
response = grabhtml(
"https://www.facebook.com/ajax/photos/snowlift/menu/",
params={"fbid": fbid, "set": fbset},
method="POST",
data={"__a": 1, "fb_dtsg": fb_dtsg}
)
# with open("test.js", "w") as f:
# f.write(response)
download_url = re.search('"download_photo","href":(.+?),"', response).group(1)
download_url = json.loads(download_url)
return urljoin(url, download_url)