Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _search_scholar_soup(soup):
"""Generator that returns Publication objects from the search page"""
while True:
for row in soup.find_all('div', 'gs_or'):
yield Publication(row, 'scholar')
if soup.find(class_='gs_ico gs_ico_nav_next'):
url = soup.find(class_='gs_ico gs_ico_nav_next').parent['href']
soup = _get_soup(_HOST+url)
else:
break
self.hindex5y = int(index[3].text)
self.i10index = int(index[4].text)
self.i10index5y = int(index[5].text)
else:
self.hindex = self.hindex5y = self.i10index = self.i10index5y = 0
# number of citations per year
years = [int(y.text) for y in soup.find_all('span', class_='gsc_g_t')]
cites = [int(c.text) for c in soup.find_all('span', class_='gsc_g_al')]
self.cites_per_year = dict(zip(years, cites))
self.publications = list()
pubstart = 0
while True:
for row in soup.find_all('tr', class_='gsc_a_tr'):
new_pub = Publication(row, 'citations')
self.publications.append(new_pub)
if 'disabled' not in soup.find('button', id='gsc_bpf_more').attrs:
pubstart += _PAGESIZE
url = '{0}&cstart={1}&pagesize={2}'.format(url_citations, pubstart, _PAGESIZE)
soup = _get_soup(_HOST+url)
else:
break
self._filled = True
return self