How to use the fanficfare.six.moves.urllib.parse.urlparse function in FanFicFare

To help you get started, we’ve selected a few FanFicFare examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github JimmXinu / FanFicFare / fanficfare / adapters / base_adapter.py View on Github external
def _setURL(self,url):
        self.url = url
        self.parsedUrl = urlparse(url)
        self.host = self.parsedUrl.netloc
        self.path = self.parsedUrl.path
        self.story.setMetadata('storyUrl',self.url,condremoveentities=False)
        self.story.setMetadata('sectionUrl',self._section_url(self.url),condremoveentities=False)
github JimmXinu / FanFicFare / fanficfare / adapters / adapter_spikeluvercom.py View on Github external
# No additional login is required, just check for adult
        pagetitle_div = soup.find('div', id='pagetitle')
        if pagetitle_div.a['href'].startswith('javascript:'):
            if not(self.is_adult or self.getConfig('is_adult')):
                raise exceptions.AdultCheckRequired(self.url)

        url = ''.join([self.url, self.METADATA_URL_SUFFIX, self.AGE_CONSENT_URL_SUFFIX])
        soup = self._customized_fetch_url(url)

        pagetitle_div = soup.find('div', id='pagetitle')
        self.story.setMetadata('title', stripHTML(pagetitle_div.a))

        author_anchor = pagetitle_div.a.findNextSibling('a')
        url = urlparse.urljoin(self.BASE_URL, author_anchor['href'])
        components = urlparse.urlparse(url)
        query_data = urlparse.parse_qs(components.query)

        self.story.setMetadata('author', stripHTML(author_anchor))
        self.story.setMetadata('authorId', query_data['uid'][0])
        self.story.setMetadata('authorUrl', url)

        sort_div = soup.find('div', id='sort')
        self.story.setMetadata('reviews', stripHTML(sort_div('a')[1]))

        listbox_tag = soup.find('div', {'class': 'listbox'})
        for span_tag in listbox_tag('span'):
            key = span_tag.string
            if key:
                key = key.strip(' :')
            try:
                value = stripHTML(span_tag.nextSibling)
github JimmXinu / FanFicFare / fanficfare / geturls.py View on Github external
def form_url(parenturl,url):
     url = url.strip() # ran across an image with a space in the
                       # src. Browser handled it, so we'd better, too.

     if "//" in url or parenturl == None:
         returl = url
     else:
         parsedUrl = urlparse(parenturl)
         if url.startswith("/") :
             returl = urlunparse(
                 (parsedUrl.scheme,
                  parsedUrl.netloc,
                  url,
                  '','',''))
         else:
             toppath=""
             if parsedUrl.path.endswith("/"):
                 toppath = parsedUrl.path
             else:
                 toppath = parsedUrl.path[:parsedUrl.path.rindex('/')]
             returl = urlunparse(
                 (parsedUrl.scheme,
                  parsedUrl.netloc,
                  toppath + '/' + url,
github JimmXinu / FanFicFare / fanficfare / adapters / __init__.py View on Github external
def _get_class_for(url):
    ## fix up leading protocol.
    fixedurl = re.sub(r"(?i)^[htp]+(s?)[:/]+",r"http\1://",url.strip())
    if fixedurl.startswith("//"):
        fixedurl = "http:%s"%url
    if not fixedurl.startswith("http"):
        fixedurl = "http://%s"%url

    ## remove any trailing '#' locations, except for #post-12345 for
    ## XenForo
    if not "#post-" in fixedurl:
        fixedurl = re.sub(r"#.*$","",fixedurl)

    parsedUrl = urlparse(fixedurl)
    domain = parsedUrl.netloc.lower()
    if( domain != parsedUrl.netloc ):
        fixedurl = fixedurl.replace(parsedUrl.netloc,domain)

    clslst = _get_classlist_fromlist(domain)
    ## assumes all adapters for a domain will have www or not have www
    ## but not mixed.
    if not clslst and domain.startswith("www."):
        domain = domain.replace("www.","")
        #logger.debug("trying site:without www: "+domain)
        clslst = _get_classlist_fromlist(domain)
        fixedurl = re.sub(r"^http(s?)://www\.",r"http\1://",fixedurl)
    if not clslst:
        #logger.debug("trying site:www."+domain)
        clslst =_get_classlist_fromlist("www."+domain)
        fixedurl = re.sub(r"^http(s?)://",r"http\1://www.",fixedurl)
github JimmXinu / FanFicFare / fanficfare / adapters / adapter_fictionmaniatv.py View on Github external
def _get_query_data(url):
    components = urlparse.urlparse(url)
    query_data = urlparse.parse_qs(components.query)
    return dict((key, data[0]) for key, data in query_data.items())
github JimmXinu / FanFicFare / fanficfare / adapters / __init__.py View on Github external
def _get_class_for(url):
    ## fix up leading protocol.
    fixedurl = re.sub(r"(?i)^[htp]+(s?)[:/]+",r"http\1://",url.strip())
    if fixedurl.startswith("//"):
        fixedurl = "http:%s"%url
    if not fixedurl.startswith("http"):
        fixedurl = "http://%s"%url

    ## remove any trailing '#' locations, except for #post-12345 for
    ## XenForo
    if not "#post-" in fixedurl:
        fixedurl = re.sub(r"#.*$","",fixedurl)

    parsedUrl = urlparse(fixedurl)
    domain = parsedUrl.netloc.lower()
    if( domain != parsedUrl.netloc ):
        fixedurl = fixedurl.replace(parsedUrl.netloc,domain)

    clslst = _get_classlist_fromlist(domain)
    ## assumes all adapters for a domain will have www or not have www
    ## but not mixed.
    if not clslst and domain.startswith("www."):
        domain = domain.replace("www.","")
        #logger.debug("trying site:without www: "+domain)
        clslst = _get_classlist_fromlist(domain)
        fixedurl = re.sub(r"^http(s?)://www\.",r"http\1://",fixedurl)
    if not clslst:
        #logger.debug("trying site:www."+domain)
        clslst =_get_classlist_fromlist("www."+domain)
        fixedurl = re.sub(r"^http(s?)://",r"http\1://www.",fixedurl)