How to use the flexget.utils.soup.get_soup function in FlexGet

To help you get started, we’ve selected a few FlexGet examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Flexget / Flexget / flexget / plugins / input / rlslog.py View on Github external
def parse_rlslog(self, rlslog_url, task):
        """
        :param rlslog_url: Url to parse from
        :param task: Task instance
        :return: List of release dictionaries
        """

        # BeautifulSoup doesn't seem to work if data is already decoded to unicode :/
        soup = get_soup(task.requests.get(rlslog_url, timeout=25).content)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            h3 = entry.find('h3', attrs={'class': 'entrytitle'})
            if not h3:
                log.debug('FAIL: No h3 entrytitle')
                continue
            release['title'] = h3.a.contents[0].strip()
            entrybody = entry.find('div', attrs={'class': 'entrybody'})
            if not entrybody:
                log.debug('FAIL: No entrybody')
                continue

            log.trace('Processing title %s' % (release['title']))
github Flexget / Flexget / flexget / components / sites / sites / hliang.py View on Github external
def parse_download_page(self, url, requests):
        txheaders = {'User-agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
        try:
            page = requests.get(url, headers=txheaders)
        except requests.exceptions.RequestException as e:
            msg = 'Cannot open "%s" : %s' % (url, str(e))
            log.error(msg)
            raise UrlRewritingError(msg)

        try:
            soup = get_soup(page.text)
        except Exception as e:
            raise UrlRewritingError(str(e))

        down_link = soup.find('a', attrs={'href': re.compile(r"down\.php\?.*")})
        if not down_link:
            raise UrlRewritingError('Unable to locate download link from url "%s"' % url)
        return 'http://bt.hliang.com/' + down_link.get('href')
github Flexget / Flexget / flexget / components / sites / sites / piratebay.py View on Github external
else:
            category = CATEGORIES.get(config.get('category', 'all'))
        filter_url = '/0/%d/%d' % (sort, category)

        entries = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)

            # TPB search doesn't like dashes or quotes
            query = query.replace('-', ' ').replace("'", " ")

            # urllib.quote will crash if the unicode string has non ascii characters, so encode in utf-8 beforehand
            url = '%s/search/%s%s' % (self.url, quote(query.encode('utf-8')), filter_url)
            log.debug('Using %s as piratebay search url' % url)
            page = task.requests.get(url).content
            soup = get_soup(page)
            for link in soup.find_all('a', attrs={'class': 'detLink'}):
                entry = Entry()
                entry['title'] = self.extract_title(link)
                if not entry['title']:
                    log.error('Malformed search result. No title or url found. Skipping.')
                    continue
                href = link.get('href')
                if href.startswith('/'):  # relative link?
                    href = self.url + href
                entry['url'] = href
                tds = link.parent.parent.parent.find_all('td')
                entry['torrent_seeds'] = int(tds[-2].contents[0])
                entry['torrent_leeches'] = int(tds[-1].contents[0])
                entry['torrent_availability'] = torrent_availability(
                    entry['torrent_seeds'], entry['torrent_leeches']
                )
github Flexget / Flexget / flexget / plugins / sites / torrent411.py View on Github external
def _solveCaptcha(self, output, url_auth, params, opener):
        """
        When trying to connect too many times with wrong password, a captcha can be requested.
        This captcha is really simple and can be solved by the provider.

        <label for="pass">204 + 65 = </label>
            <input value="" id="lgn" name="captchaAnswer" size="40" type="text">
            <input value="204 + 65 = " name="captchaQuery" type="hidden">
            <input value="005d54a7428aaf587460207408e92145" name="captchaToken" type="hidden">
        <br>

        :param output: initial login output
        :return: output after captcha resolution
        """
        html = get_soup(output)

        query = html.find('input', {'name': 'captchaQuery'})
        token = html.find('input', {'name': 'captchaToken'})
        if not query or not token:
            log.error('Unable to solve login captcha.')
            return output

        query_expr = query.attrs['value'].strip('= ')
        log.debug('Captcha query: ' + query_expr)
        answer = arithmeticEval(query_expr)

        log.debug('Captcha answer: %s' % answer)

        params['captchaAnswer'] = answer
        params['captchaQuery'] = query.attrs['value']
        params['captchaToken'] = token.attrs['value']
github Flexget / Flexget / flexget / plugins / sites / site_1337x.py View on Github external
"""
            Gets the download information for 1337x result
        """

        url = entry['url']

        log.info('1337x rewriting download url: %s' % url)

        try:
            page = task.requests.get(url)
            log.debug('requesting: %s', page.url)
        except RequestException as e:
            log.error('1337x request failed: %s', e)
            raise UrlRewritingError('1337x request failed: %s', e)

        soup = get_soup(page.content)

        magnet_url = str(soup.find('a', href=re.compile(r'^magnet:\?')).get('href')).lower()
        torrent_url = str(soup.find('a', href=re.compile(r'\.torrent$')).get('href')).lower()

        entry['url'] = torrent_url
        entry.setdefault('urls', []).append(torrent_url)
        entry['urls'].append(magnet_url)
github Flexget / Flexget / flexget / plugins / input / sceper.py View on Github external
def parse_site(self, url, task):
        """Parse configured url and return releases array"""

        try:
            page = task.requests.get(url).content
        except RequestException as e:
            raise plugin.PluginError('Error getting input page: %s' % e)
        soup = get_soup(page)

        releases = []
        for entry in soup.find_all('div', attrs={'class': 'entry'}):
            release = {}
            title = entry.find('h2')
            if not title:
                log.debug('No h2 entrytitle')
                continue
            release['title'] = title.a.contents[0].strip()

            log.debug('Processing title %s' % (release['title']))

            for link in entry.find_all('a'):
                # no content in the link
                if not link.contents:
                    continue
github Flexget / Flexget / flexget / components / sites / sites / newtorrents.py View on Github external
def entries_from_search(self, name, url=None):
        """Parses torrent download url from search results"""
        name = normalize_unicode(name)
        if not url:
            url = 'http://www.newtorrents.info/search/%s' % quote(
                name.encode('utf-8'), safe=b':/~?=&amp;%'
            )

        log.debug('search url: %s' % url)

        html = requests.get(url).text
        # fix  so that BS does not crash
        # TODO: should use beautifulsoup massage
        html = re.sub(r'()', r'\1\2', html)

        soup = get_soup(html)
        # saving torrents in dict
        torrents = []
        for link in soup.find_all('a', attrs={'href': re.compile('down.php')}):
            torrent_url = 'http://www.newtorrents.info%s' % link.get('href')
            release_name = link.parent.next.get('title')
            # quick dirty hack
            seed = link.find_next('td', attrs={'class': re.compile('s')}).renderContents()
            if seed == 'n/a':
                seed = 0
            else:
                try:
                    seed = int(seed)
                except ValueError:
                    log.warning(
                        'Error converting seed value (%s) from newtorrents to integer.' % seed
                    )
github Juszoe / flexget-nexusphp / nexusphp.py View on Github external
except Exception:
            discount = expired_time = None  # 无优惠

        try:
            if hr_fn:
                hr = hr_fn(detail_page)
            else:
                hr = False
                for item in ['hitandrun', 'hit_run.gif', 'Hit and Run', 'Hit & Run']:
                    if item in detail_page.text:
                        hr = True
                        break
        except Exception:
            hr = False  # 无HR

        soup = get_soup(peer_page.replace('\n', ''), 'html5lib')
        seeders = leechers = []
        tables = soup.find_all('table', limit=2)
        if len(tables) == 2:                                     # 1. seeder leecher 均有
            seeders = NexusPHP.get_peers(tables[0])
            leechers = NexusPHP.get_peers(tables[1])
        elif len(tables) == 1 and len(soup.body.contents) == 3:  # 2. seeder leecher 有其一
            nodes = soup.body.contents
            if nodes[1].name == 'table':                    # 2.1 只有seeder 在第二个节点
                seeders = NexusPHP.get_peers(nodes[1])
            else:                                           # 2.2 只有leecher 在第三个节点
                leechers = NexusPHP.get_peers(nodes[2])
        else:                                                    # 3. seeder leecher 均无
            seeders = leechers = []
        return discount, seeders, leechers, hr, expired_time
github Flexget / Flexget / flexget / components / sites / sites / descargas2020.py View on Github external
log.debug('Search Descargas2020')
        url_search = 'https://descargas2020.org/buscar'
        results = set()
        for search_string in entry.get('search_strings', [entry['title']]):
            query = normalize_unicode(search_string)
            query = re.sub(r' \(\d\d\d\d\)$', '', query)
            log.debug('Searching Descargas2020 %s', query)
            query = unicodedata.normalize('NFD', query).encode('ascii', 'ignore')
            data = {'q': query}
            try:
                response = task.requests.post(url_search, data=data)
            except requests.RequestException as e:
                log.error('Error searching Descargas2020: %s', e)
                return results
            content = response.content
            soup = get_soup(content)
            soup2 = soup.find('ul', attrs={'class': 'buscar-list'})
            children = soup2.findAll('a', href=True)
            for child in children:
                entry = Entry()
                entry['url'] = child['href']
                entry_title = child.find('h2')
                if entry_title is None:
                    log.debug('Ignore empty entry')
                    continue
                entry_title = entry_title.text
                if not entry_title:
                    continue
                try:
                    entry_quality_lan = re.search(
                        r'.+ \[([^\]]+)\](\[[^\]]+\])+$', entry_title
                    ).group(1)
github Flexget / Flexget / flexget / components / sites / sites / horriblesubs.py View on Github external
def horrible_entries(requests, page_url):
        entries = []

        try:
            soup = get_soup(requests.get(page_url).content)
        except RequestException as e:
            log.error('HorribleSubs request failed: %s', e)
            return entries

        for li_label in soup.findAll('li'):
            title = '[HorribleSubs] {0}{1}'.format(
                str(li_label.find('span').next_sibling), str(li_label.find('strong').text)
            )
            log.debug('Found title `%s`', title)
            url = li_label.find('a')['href']
            episode = re.sub(r'.*#', '', url)
            # Get show ID
            try:
                soup = get_soup(requests.get('https://horriblesubs.info/{0}'.format(url)).content)
            except RequestException as e:
                log.error('HorribleSubs request failed: %s', e)