Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
utils.remove_accents(tds[2].a.next).lower(),
tds[3].a.next,
utils.remove_accents(tds[3].a.next).lower(),
tds[4].a.next,
tds[5].next+tds[5].span.next,
tds[6].a.next]
if isinstance(tds[4].a.next, bs4.element.Tag):
song[7] =''
if not isinstance(tds[7].a.next, bs4.element.Tag):
song.append(tds[7].a.next)
else:
song.append('?')
for i, curVal in enumerate(song):
if isinstance(curVal, bs4.element.Tag):
song[i] = ''
#print(song)
songs.append(song)
curPage+=1
url = orchestra = re.sub(r'P=\d+','P='+str(curPage), url)
print(url)
fp = urllib.request.urlopen(url)
soup = BeautifulSoup(fp.read(), "lxml")
else:
print("Pas d'enregistrement pour "+url)
#exit(0);
return songs;
def appendChild(self, node):
string_child = child = None
if isinstance(node, str):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
string_child = child = node
elif isinstance(node, Tag):
# Some other piece of code decided to pass in a Tag
# instead of creating an Element object to contain the
# Tag.
child = node
elif node.element.__class__ == NavigableString:
string_child = child = node.element
node.parent = self
else:
child = node.element
node.parent = self
if not isinstance(child, str) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
def appendChild(self, node):
string_child = child = None
if isinstance(node, str):
# Some other piece of code decided to pass in a string
# instead of creating a TextElement object to contain the
# string.
string_child = child = node
elif isinstance(node, Tag):
# Some other piece of code decided to pass in a Tag
# instead of creating an Element object to contain the
# Tag.
child = node
elif node.element.__class__ == NavigableString:
string_child = child = node.element
else:
child = node.element
if not isinstance(child, str) and child.parent is not None:
node.element.extract()
if (string_child and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
# We are appending a string onto another string.
# TODO This has O(n^2) performance, for input like
if type(content) is bs4.element.NavigableString:
current_node['text'] += "\n" + content
# Grap content from inline elements
elif (
type(content) is bs4.element.Tag and
content.name in text_elements
):
current_node['text'] += "\n" + content.text
# Ignore script tags
elif (
type(content) is bs4.element.Tag and
content.name in ignore_elements
):
pass
# Continue traversing the html tree
elif type(content) is bs4.element.Tag:
child_nodes.append(_most_words(content))
current_node['number_of_words'] = len(re.findall(word_regex,
current_node['text']))
current_node['path'] = [] #empty path
# Select the node containing most words
# and add the current location to the path
node_with_most_words = max(child_nodes + [current_node],
key=lambda x:x['number_of_words'])
node_with_most_words['path'].insert(0, (soup_xml_tag.name, str(soup_xml_tag.attrs)))
return node_with_most_words
if len(temp_results[0].find_all('div')) and temp_results[0].div.get('style'):
self.word.props[''] = temp_results[0].div.text
return True
# with open("test", 'w') as fout:
# fout.write(base.prettify())
# todo: 发音的页面结构改变了
temp_results = base.find_all("div", class_="base-speak")
if temp_results:
temp = temp_results[0]
for node in temp:
temp1 = ''
temp2 = ''
if isinstance(node, bs4.element.Tag):
for node1 in node:
if not isinstance(node1, bs4.element.Tag):
continue
if node1.name == 'span':
temp1 = node1.text
elif node1.name == 'i':
temp3 = voice_url_reg.findall(node1['ms-on-mouseover'])
if temp3:
temp2 = temp3[0]
self.word.voices.append((temp1, temp2))
# 获取基本词义
print(self.word.voices)
temp_results = base.find_all('ul', class_='Mean_part__1RA2V')
# print(temp_results)
if temp_results:
print(temp_results)
meaning_text = ''
soup = bs4.BeautifulSoup(t.text)
table_body = soup.find("tbody")
# Check if the page retrieved has no submissions
if len(table_body) == 1:
return submissions
row = 0
submissions[handle][page] = {}
for i in table_body:
submissions[handle][page][it] = []
submission = submissions[handle][page][it]
append = submission.append
if isinstance(i, bs4.element.Tag):
if row == 0:
currid = i.contents[1].contents[0]
if currid == previd:
flag = 1
break
row += 1
previd = currid
# Time of submission
tos = i.contents[3].contents[1].contents[0]
curr = time.strptime(str(tos), "%Y-%m-%d %H:%M:%S")
if curr <= last_retrieved:
return submissions
append(str(tos))
# Problem Name/URL