How to use the cloudscraper.create_scraper function in cloudscraper

To help you get started, we’ve selected a few cloudscraper examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github VeNoMouS / cloudscraper / tests / test_cloudscraper.py View on Github external
def test_bad_reCaptcha_challenge_12_12_2019(self, **kwargs):
        # test bad reCaptcha extraction.
        with pytest.raises(RuntimeError, match=r".*?we can't extract the parameters correctly.*?"):
            scraper = cloudscraper.create_scraper(**kwargs)
            scraper.reCaptcha_Challenge_Response(None, None, '', '')
github VeNoMouS / cloudscraper / tests / test_cloudscraper.py View on Github external
def test_bad_interpreter_js_challenge_11_12_2019(self, **kwargs):
        # test bad interpreter
        with pytest.raises(RuntimeError, match=r"Unable to parse Cloudflare anti-bots page: No module named*?"):
            scraper = cloudscraper.create_scraper(interpreter='badInterpreter', **kwargs)
            scraper.get(url)
github VeNoMouS / cloudscraper / tests / test_cloudscraper.py View on Github external
def test_js_challenge_11_12_2019(self, **kwargs):
        # test interpreters
        for interpreter in ['native', 'js2py', 'nodejs']:
            scraper = cloudscraper.create_scraper(interpreter=interpreter, **kwargs)
            scraper.get(url)
github Xonshiz / comic-dl / comic_dl / sites / japscan.py View on Github external
def __init__(self, manga_url, download_directory, chapter_range, **kwargs):
        self.scraper = cloudscraper.create_scraper()
        conversion = kwargs.get("conversion")
        keep_files = kwargs.get("keep_files")
        self.logging = kwargs.get("log_flag")
        self.sorting = kwargs.get("sorting_order")
        self.manga_url = manga_url + '/'
        self.print_index = kwargs.get("print_index")

        if 'manga' in manga_url:
            self.comic_id = str(str(manga_url).split("/")[-1])
            self.full_series(comic_id=self.comic_id, sorting=self.sorting, download_directory=download_directory,
                             chapter_range=chapter_range, conversion=conversion, keep_files=keep_files)

        if 'lecture-en-ligne' in manga_url:
            self.comic_id = str(str(manga_url).split("/")[-2])
            chapter_path = re.sub(re.compile(r'.*japscan.to'), '', str(self.manga_url))
            self.single_chapter(chapter_path, comic_id=self.comic_id, download_directory=download_directory,
github TheodoreKrypton / JavPy / JavPy / sources / javlibrary_com.py View on Github external
from JavPy.sources.BaseSource import INewlyReleased, IGetBrief
import cloudscraper
from JavPy.utils.requester import submit, wait_until
import re
from JavPy.functions.datastructure import AV, Brief
import datetime
import bs4
from JavPy.utils.config import proxy


class JavLibraryCom(INewlyReleased, IGetBrief):

    __client = cloudscraper.create_scraper()

    @classmethod
    def priority(mcs):
        return 1

    @classmethod
    def get_newly_released(mcs, page):
        major_info_req = submit(
            mcs.__client.get,
            "http://www.javlibrary.com/cn/vl_newrelease.php?mode=2&page=%d" % page,
            proxies=proxy
        )
        dates_req = submit(
            mcs.__client.get,
            "http://www.javlibrary.com/cn/vl_newrelease.php?list&mode=2&page=%d" % page,
            proxies=proxy
github Xonshiz / comic-dl / comic_dl / readcomiconline / dataUpdate.py View on Github external
def __init__ (self, link=None, name=None):
        self.BASE = "https://readcomiconline.to/Comic/"
        self.link = link

        if name:
            self.link = self.BASE + self.nameLink(str(name).strip())

        self.session = requests.session()
        self.scraper = cloudscraper.create_scraper(sess=self.session)

        self.data = ""
        try:
            self.data = json.load(open("rco-data.json", "r"))
        except Exception as e:
            print("An error occurred : {}".format(repr(e)))
            print("Download the data from {}".format("https://drive.google.com/open?id=1eOjwOQx_LHericcowyBRNIJtZZGMKlp6"))
            print("And paste it inside the comic-dl/comic_dl directory")
            sys.exit()
        
        if self.link:
            try:
                soup = BeautifulSoup(
                    self.scraper.get(self.link).content,
                    "html.parser"
                )
github rix1337 / RSScrawler / rsscrawler / search.py View on Github external
specific_season = re.match(r'^(.*),(s\d{1,3})$', title.lower())
    specific_episode = re.match(r'^(.*),(s\d{1,3}e\d{1,3})$', title.lower())
    if specific_season:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    elif specific_episode:
        split = title.split(",")
        title = split[0]
        special = split[1].upper()
    else:
        special = None

    bl_final = {}
    sj_final = {}
    scraper = cloudscraper.create_scraper(browser={'browser': 'chrome', 'mobile': False})

    if not sj_only:
        mb_query = sanitize(title).replace(" ", "+")
        if special:
            bl_query = mb_query + "+" + special
        else:
            bl_query = mb_query

        unrated = []

        config = RssConfig('MB', configfile)
        quality = config.get('quality')
        ignore = config.get('ignore')

        if "480p" not in quality:
            search_quality = "+" + quality
github shmilylty / OneForAll / oneforall / modules / datasets / dnsdb.py View on Github external
def get_tokens(self):
        """
        绕过cloudFlare验证并获取taken

        :return: 绕过失败返回None 成功返回tokens
        """
        scraper = cloudscraper.create_scraper()
        scraper.interpreter = 'js2py'
        scraper.proxies = self.get_proxy(self.source)
        scraper.timeout = 10
        try:
            tokens = scraper.get_tokens(self.url)
        except Exception as e:
            logger.log('ERROR', e.args)
            return None
        if len(tokens) != 2:
            return None
        return tokens
github TheodoreKrypton / JavPy / JavPy / sources / javfull_net.py View on Github external
from JavPy.sources.BaseSource import ISearchByCode
import bs4
from JavPy.functions.datastructure import AV
from JavPy.utils.config import proxy
import cloudscraper


class JavFullNet(ISearchByCode):
    __client = cloudscraper.create_scraper()

    @classmethod
    def search_by_code(mcs, code):
        url = "https://javfull.net/?s=" + code
        html = mcs.__client.get(url, proxies=proxy).text
        bs = bs4.BeautifulSoup(html, "lxml")
        item = bs.select(".item")[0]

        av = AV()
        av.code = code
        av.preview_img_url = item.find(name="img").attrs["src"]
        av.video_url = item.find(name="a").attrs["href"]

        return av

    @classmethod
github ozmartian / tvlinker / tvlinker / threads.py View on Github external
def __init__(self, source_url: str, useragent: str, maxpages: int):
        super(ScrapeWorker, self).__init__()
        self.maxpages = maxpages
        self.source_url = source_url
        self.user_agent = useragent
        self.scraper = cloudscraper.create_scraper()
        self.scraper.proxies = ShadowSocks.proxies()
        self.tz_format = '%b %d %Y %H:%M'
        self.tz_local = get_localzone()
        self.complete = False