Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
def getChapterText(self, url):
logger.debug('Getting chapter text from: %s' % url)
soup = self.make_soup(self._fetchUrl(url))
div = soup.find('div', {'class' : 'listbox'})
if None == div:
div = soup.find('div', {'id' : 'story'})
if None == div:
raise exceptions.FailedToDownload("Error downloading Chapter: %s! Missing required element!" % url)
return self.utf8FromSoup(url,div)
def get_page(self, page):
'''
This will download the url from the web and return the data
I'm using it since I call several pages below, and this will cut down
on the size of the file
'''
try:
page_data = self._fetchUrl(page)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist('404 error: {}'.format(page))
else:
raise e
return page_data
else:
params['penname'] = self.getConfig("username")
params['password'] = self.getConfig("password")
params['cookiecheck'] = '1'
params['submit'] = 'Submit'
loginUrl = 'http://' + self.getSiteDomain() + '/user.php?action=login'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['penname']))
d = self._fetchUrl(loginUrl, params)
if "Member Account" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['penname']))
raise exceptions.FailedToLogin(url,params['penname'])
return False
else:
return True
def extractChapterUrlsAndMetadata(self):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
# print data
# Now go hunting for all the meta data and the chapter list.
## Title and author
a = soup.find('div', {'id' : 'pagetitle'})
aut = a.find('a', href=re.compile(r"viewuser.php\?uid=\d+"))
def extractChapterUrlsAndMetadata(self):
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = self.url
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
except urllib2.HTTPError, e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: Access denied. This story has not been validated by the adminstrators of this site.")
elif "That story either does not exist on this archive or has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied(self.getSiteDomain() +" says: That story either does not exist on this archive or has not been validated by the adminstrators of this site.")
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
# print data
# Now go hunting for all the meta data and the chapter list.
## Title
# index=1 makes sure we see the story chapter index. Some
# sites skip that for one-chapter stories.
url = '{0}&index=1{1}'.format(self.url,addURL)
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url)
except HTTPError as e:
if e.code == 404:
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
if "Content is only suitable for mature adults. May contain explicit language and adult themes. Equivalent of NC-17." in data:
raise exceptions.AdultCheckRequired(self.url)
if "Access denied. This story has not been validated by the adminstrators of this site." in data:
raise exceptions.AccessDenied("{0} says: Access denied. This story has not been validated by the adminstrators of this site.".format(self.getSiteDomain()))
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
# Now go hunting for all the meta data and the chapter list.
## Title
## Some stories have a banner that has it's own a tag before the actual text title...
## so I'm checking the pagetitle div for all a tags that match the criteria, then taking the last.
a = soup.find('div',{'id':'pagetitle'}).findAll('a', href=re.compile(r'viewstory.php\?sid='+self.story.getMetadata('storyId')+"$"))[-1]
self.story.setMetadata('title',stripHTML(a))
# Find authorid and URL from... author url.
def extractChapterUrlsAndMetadata(self):
url = self.url
logger.debug("URL: "+url)
try:
data = self._fetchUrl(url) # w/o trailing / gets 'chapter list' page even for one-shots.
except HTTPError as e:
if e.code == 404:
logger.error("404 on %s"%url)
raise exceptions.StoryDoesNotExist(self.url)
else:
raise e
# use BeautifulSoup HTML parser to make everything easier to find.
soup = self.make_soup(data)
## title:
## <h1 id="post-title">A, A' Fan Fiction ❯ Mmmmm</h1>
titletext = unicode(stripHTML(soup.find("h1",{"id":"post-title"})))
titletext = titletext[titletext.index(u'❯')+2:]
# print("title:(%s)"%titletext)
self.story.setMetadata('title',titletext)
# [ A - All Readers ], strip '[ ' ' ]'
## Above title because we remove the smtxt font to get title.
smtxt = soup.find("div",{"id":"post-rating"})
def getConfigSectionsFor(url):
(cls,fixedurl) = _get_class_for(url)
if cls:
return cls.getConfigSections()
# No adapter found.
raise exceptions.UnknownSite( url, [cls.getSiteDomain() for cls in __class_list] )
else:
params['user'] = self.getConfig("username")
params['passwrd'] = self.getConfig("password")
loginUrl = 'http://' + self.getSiteDomain() + '/board/index.php'
logger.debug("Will now login to URL (%s) as (%s)" % (loginUrl,
params['user']))
d = self._fetchUrl(loginUrl+'?action=login2&user='+params['user']+'&passwrd='+params['passwrd'])
d = self._fetchUrl(loginUrl)
if "Show unread posts since last visit" not in d : #Member Account
logger.info("Failed to login to URL %s as %s" % (loginUrl,
params['user']))
raise exceptions.FailedToLogin(url,params['user'])
return False
else:
return True