Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if idnum >= 1000:
logger.warn("storyId:%s - Custom INI data will be used."%idstr)
sections = ['teststory:%s'%idstr,'teststory:defaults']
#print("self.get_config_list(sections,'valid_entries'):%s"%self.get_config_list(sections,'valid_entries'))
for key in self.get_config_list(sections,'valid_entries'):
if key.endswith("_list"):
nkey = key[:-len("_list")]
#print("addList:%s"%(nkey))
for val in self.get_config_list(sections,key):
#print("addList:%s->%s"%(nkey,val))
self.story.addToList(nkey,ensure_text(val).replace('{{storyId}}',idstr))
else:
# Special cases:
if key in ['datePublished','dateUpdated']:
self.story.setMetadata(key,makeDate(self.get_config(sections,key),"%Y-%m-%d"))
else:
self.story.setMetadata(key,ensure_text(self.get_config(sections,key)).replace('{{storyId}}',idstr))
#print("set:%s->%s"%(key,self.story.getMetadata(key)))
if self.has_config(sections,'chapter_urls'):
for l in self.get_config(sections,'chapter_urls').splitlines() :
if l:
self.add_chapter(l[1+l.index(','):],l[:l.index(',')])
else:
for (j,chap) in enumerate(self.get_config_list(sections,'chaptertitles'),start=1):
self.add_chapter(chap,self.url+"&chapter=%d"%j)
return
if idnum >= 700 and idnum <= 710:
self._setURL('http://test1.com?sid=%s'%(idnum+100))
# story tags
a = mainmeta.find('span',text='Tags')
if a:
tags = a.parent.findAll('a')
for tag in tags:
self.story.addToList('tags', tag.text)
# story tags
a = mainmeta.find('span',text='Characters')
if a:
self.story.addToList('characters', a.nextSibling)
# published on
a = soup.find('span', text='Published')
a = a.parent.find('time')
self.story.setMetadata('datePublished', makeDate(a['datetime'], self.dateformat))
# updated on
a = soup.find('span', text='Updated')
if a:
a = a.parent.find('time')
self.story.setMetadata('dateUpdated', makeDate(a['datetime'], self.dateformat))
# word count
a = soup.find('span', text='Total Word Count')
if a:
a = a.find_next('span')
self.story.setMetadata('numWords', int(a.text.split()[0]))
# upvote, subs, and views
a = soup.find('div',{'class':'title-meta'})
spans = a.findAll('span', recursive=False)
# Find authorid and URL from... author url.
head = soup.find('div', {'class' : 'story-left'})
a = head.find('a')
self.story.setMetadata('authorId',a['href'].split('/')[2])
self.story.setMetadata('authorUrl','https://'+self.host+'/'+a['href'])
self.story.setMetadata('author',stripHTML(a))
# Find the chapters:
for chapter in soup.find('select').findAll('option'):
self.add_chapter(chapter,'https://'+self.host+'/s/'+self.story.getMetadata('storyId')+'/'+chapter['value'])
## title="Wörter" failed with max_zalgo:1
self.story.setMetadata('numWords',stripHTML(soup.find("span",{'class':"fa-keyboard"}).parent))
self.story.setMetadata('language','German')
self.story.setMetadata('datePublished', makeDate(stripHTML(head.find('span',title='erstellt').parent), self.dateformat))
self.story.setMetadata('dateUpdated', makeDate(stripHTML(head.find('span',title='aktualisiert').parent), self.dateformat))
## Genre now shares a line with rating.
genres=stripHTML(head.find('span',class_='fa-angle-right').next_sibling)
self.story.extendList('genre',genres[:genres.index(' / ')].split(', '))
self.story.setMetadata('rating', genres[genres.index(' / ')+3:])
self.story.addToList('category',stripHTML(soup.find('span',id='ffcbox-story-topic-1')).split('/')[2].strip())
try:
self.story.setMetadata('native_status', head.find_all('span',{'class':'titled-icon'})[3]['title'])
except e:
logger.debug("Failed to find native status:%s"%e)
if head.find('span',title='Fertiggestellt'):
self.story.setMetadata('status', 'Completed')
'Secondary Relationship(s)':'ships',
'Genre(s)':'genre',
'Era':'era',
'Advisory':'warnings',
'Story Reviews':'reviews',
# 'Status':'', # Status is treated special
'First Published':'datePublished',
'Last Updated':'dateUpdated',
}
for key in soup.find_all('div',{'class':'entry__key'}):
value = stripHTML(key.find_next('div',{'class':'entry__value'}))
key = stripHTML(key)
meta = meta_key_map.get(key,None)
if meta:
if meta.startswith('date'):
value = makeDate(value,self.dateformat)
if meta in ('characters','genre','ships'):
self.story.extendList(meta,value.split(','))
else:
self.story.setMetadata(meta,value)
if key == 'Status':
if value == 'WIP':
value = 'In-Progress'
elif value == 'COMPLETED':
value = 'Completed'
# 'Abandoned' and other possible values used as-is
self.story.setMetadata('status',value)
# older stories don't present total words, use sum from chapters.
if not self.story.getMetadata('numWords'):
self.story.setMetadata('numWords',chapter_words)
self.story.extendList('genre',genre_list)
## getting cover
img = soup.find('img', class_='wp-post-image')
if img:
self.setCoverImage(url,img['src'])
## getting chapters
cdata = soup.select('.chapter-list .row')
cdata.reverse()
cdates = []
for row in cdata:
# <span>May-08-18</span>
dt = row.find_all('span')[-1].string
cdates.append(makeDate(dt, '%b-%d-%y'))
clink = row.find('a')
self.add_chapter(clink.string, clink['href'])
cdates.sort()
# dateUpdated in upper part show only date of last chapter, but if
# chapter in middle will be updated - it will be ignored. So we select
# dates manually
self.story.setMetadata('dateUpdated', cdates[-1])
self.story.setMetadata('datePublished', cdates[0])
## getting description
cdata = soup.select_one('#noidungm')
cdata.find('h2').extract()
self.setDescription(url, cdata)
def parseDateText(text):
if text == u'Вчера':
return todayInMoscow() - datetime.timedelta(days=1)
elif text == u'Сегодня':
return todayInMoscow()
else:
return makeDate(text, '%d.%m.%Y, %H:%M')
# The try/except is still needed, because some author pages are no longer on the site, but
# the story is, but the UnicodeDecodeError is no longer needed, so was removed
authorurl = self.story.getMetadata('authorUrl')
try:
adata = self._fetchUrl(authorurl)
except (HTTPError) as e:
## Can't get the author's page, so we use what is on the story page
tags = soup.find('div',{'id':'storytags'}).find('a')
if tags:
for tag in tags:
self.story.addToList('eroticatags',stripHTML(tag))
labels = soup.findAll('label')
if labels:
for label in labels:
if label.string == 'Added:':
self.story.setMetadata('datePublished', makeDate(label.nextSibling.string.strip(
), self.dateformat))
elif label.string == 'Words:':
self.story.setMetadata('numWords',label.nextSibling.string.strip())
summary = stripHTML(soup.find('div',{'class':'oneliner'}))
if len(summary) == 0:
summary = '>>>>>>>>>> No Summary Found <<<<<<<<<<'
else:
summary = stripHTML(summary)
self.setDescription(url,summary)
# since the 'story' is one page, I am going to save the soup here, so we can use iter
# to get the story text in the getChapterText function, instead of having to retrieve
# it again.
self.html = soup
return
'Updated':'dateUpdated',
'Status':'status',
'Words:':'numWords',
'Size:':'size',
'Comments:':'comments',
'Views:':'views',
'Faves...:':'favs',
'Rating:':'vote_rating',
}
for label in storyInfo.find_all('span',class_='label'):
l = meta_labels.get(stripHTML(label),None)
if l:
val = label.next_sibling#.strip()
indent = label.find_next('span',class_='indent')
if l.startswith('date'):
val = makeDate(stripHTML(indent),self.dateformat)
elif l == 'status':
if 'Complete' in indent:
val = 'Completed'
else:
val = 'In-Progress'
self.story.setMetadata(l,val)
# logger.debug("%s => '%s'"%(l,val))
# process and remove non-desc stuff from storyDescript
storyDescript.find('div', class_='storyRead').extract()
storyDescript.find('div', class_='storyVote').extract()
warnings = storyDescript.find('div', class_='ratings_box')
for warn in warnings.find_all('span',class_='rating'):
self.story.addToList('warnings',warn['title'])
warnings.extract()
soup = self.make_soup(data)
info = soup.select_one('#info')
self.story.setMetadata('title', stripHTML(info.h1))
self.setCoverImage(self.url, soup.select_one('#fmimg > img')['src'])
info_paragraphs = info('p')
# Unicode strings because ':' isn't ':', but \xef\xbc\x9a
author = stripHTML(info_paragraphs[0]).replace(u'Author:', '', 1)
self.story.setMetadata('author', author)
self.story.setMetadata('authorId', author)
datestr = stripHTML(info_paragraphs[2]).replace(u'UpdateTime:', '', 1)
date = None
try:
## Some older stories use a different date format.
date = makeDate(datestr, self.NEW_DATE_FORMAT)
except ValueError:
date = makeDate(datestr, self.OLD_DATE_FORMAT)
if date:
self.story.setMetadata('dateUpdated', date)
intro = soup.select_one('#intro')
# Strip <strong>Description</strong>
if intro.strong:
intro.strong.decompose()
self.setDescription(self.url, intro)
dl = soup.select_one('#list > dl')
for el in dl.contents:
if el.name == u'dt':
match = re.match(ensure_text(r'^《.+》\s+(.+)$'), stripHTML(el), re.UNICODE)
volume = ''