Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# garbage may be sucked in to the verse text so if we do not get a clean int() then ignore the verse
# completely.
try:
clean_verse_num = int(str(raw_verse_num))
except ValueError:
verse_parts = str(raw_verse_num).split('-')
if len(verse_parts) > 1:
clean_verse_num = int(verse_parts[0])
except TypeError:
log.warning('Illegal verse number: %s', str(raw_verse_num))
if clean_verse_num:
verse_text = raw_verse_num.next_element
part = raw_verse_num.next_element.next_element
while not (isinstance(part, Tag) and part.get('class')[0] == 'versenum'):
# While we are still in the same verse grab all the text.
if isinstance(part, NavigableString):
verse_text += part
if isinstance(part.next_element, Tag) and part.next_element.name == 'div':
# Run out of verses so stop.
break
part = part.next_element
verse_list[clean_verse_num] = str(verse_text)
return verse_list
def parse_content(parent: Union[bs4.NavigableString, bs4.Tag]) -> bs4.NavigableString:
res = ''
if isinstance(parent, bs4.NavigableString):
return parent
else:
children = parent.contents
if len(children) == 0:
html_tag = str(parent)
return bs4.NavigableString('\n') if 'br' in html_tag else bs4.NavigableString('')
else:
for child in children:
res += parse_content(child)
return bs4.NavigableString(res)
{{text}}
""")
text = ''
for img in soup.find_all('img'):
img['src'] = os.path.join(
'figs', os.path.basename(img['src']))
for node in soup.body.contents:
if isinstance(node, Tag) and node.name == 'pre':
codefile = tempfile.NamedTemporaryFile(delete=False)
codepath = codefile.name
with codefile:
for code in node.contents:
if isinstance(code, Tag) and code.name == 'br':
codefile.write('\n')
elif not isinstance(code, Comment):
if isinstance(code, NavigableString):
codefile.write(code.string)
else:
codefile.write(code.get_text().encode('utf-8'))
codetext = subprocess.check_output(
["source-highlight", "-s", "scala", "-i", codepath])
os.remove(codepath)
text = text + codetext.decode('utf-8')
elif not isinstance(node, Comment):
text = text + str(node).decode('utf-8')
htmlfile.write("""
{% endblock %}
""")
# Dress up with base layout
env = Environment(loader=ChoiceLoader([
FileSystemLoader('.'),
def calc_effective_text_len(self, node):
"""
Calc the total the length of text in a child, same as
sum(len(s) for s in cur_node.stripped_strings)
"""
if node.text_len is not None:
return node.text_len
text_len = 0
for child in node.children:
if isinstance(child, Tag):
if child.name == 'a':
continue
text_len += self.calc_effective_text_len(child)
# Comment is also an instance of NavigableString,
# so we should not use isinstance(child, NavigableString)
elif type(child) is NavigableString:
text_len += len(child.string.strip()) + child.string.count(',') + \
child.string.count(u',') # Chinese comma
node.text_len = text_len * .2 if self.has_negative_effect(node) else text_len
return node.text_len
def is_simpletable(table):
"""test if the table has only strings in the cells"""
tds = table('td')
for td in tds:
if td.contents != []:
if len(td.contents) == 1:
if not isinstance(td.contents[0], NavigableString):
return False
else:
return False
return True
def traverse(self, elem):
for part in elem.contents:
if isinstance(part, NavigableString):
self.run.add(text_type(part), italic=self.italic, bold=self.bold, underline=self.underline, strike=self.strike, size=self.size)
self.still_new = False
elif isinstance(part, Tag):
# logmessage("Part name is " + text_type(part.name))
if part.name == 'p':
self.new_paragraph()
self.traverse(part)
elif part.name == 'li':
self.new_paragraph()
self.traverse(part)
elif part.name == 'ul':
# logmessage("Entering a UL")
oldstyle = self.style
self.style = 'ul'
self.indentation += 10
self.traverse(part)
def createTextNode(self, data):
from .Text import Text
return Text(self, bs4.NavigableString(data))
def parse_sections(article):
"""
Parse list of sections from a given BeautifulSoup of an article
"""
article_text = article.find('text')
divs = article_text.find_all('div', attrs={'xmlns': 'http://www.tei-c.org/ns/1.0'})
sections = []
for div in divs:
div_list = list(div.children)
if len(div_list) == 0:
heading = ''
text = ''
elif len(div_list) == 1:
if isinstance(div_list[0], NavigableString):
heading = str(div_list[0])
text = ''
else:
heading = ''
text = div_list[0].text
else:
text = []
heading = div_list[0]
if isinstance(heading, NavigableString):
heading = str(heading)
p_all = list(div.children)[1:]
else:
heading = ''
p_all = list(div.children)
for p in p_all:
if p is not None:
def _recurseUntilString(self, node):
"""
Digs through HTML that Word made worse.
Written to deal with http://www2.usfirst.org/2011comp/Events/cmp/matchresults.html
"""
from bs4 import NavigableString
if node.string is not None:
return re.sub('\s+', ' ', node.string.replace(u'\xa0', ' ')).strip() # remove multiple whitespaces
if isinstance(node, NavigableString):
return node
if hasattr(node, 'contents'):
results = []
for content in node.contents:
result = self._recurseUntilString(content)
if result is not None:
result = result.strip().replace('\r', '').replace('\n', '').replace(' ', ' ')
if result is not None and result != "":
results.append(result)
if results != []:
return ' '.join(results)
return None
def _add_navigable_string_to_empty_tag(soup):
for el in list(soup.descendants):
if isinstance(el, Tag) and not list(el.children) and el.name not in ('br',):
el.append(NavigableString(' '))