How to use the bs4.element.NavigableString function in bs4

To help you get started, we’ve selected a few bs4 examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dlon / html2markdown / html2markdown.py View on Github external
def _breakRemNewlines(tag):
	"""non-recursively break spaces and remove newlines in the tag"""
	for i,c in enumerate(tag.contents):
		if type(c) != bs4.element.NavigableString:
			continue
		c.replace_with(re.sub(r' {2,}', ' ', c).replace('\n',''))
github qkaren / converse_reading_cmr / data / src / create_official_data.py View on Github external
def insert_escaped_tags(tags, label=None):
    """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
       so that these tags are still there when html markup is stripped out."""
    found = False
    for tag in tags:
        strs = list(tag.strings)
        if len(strs) &gt; 0:
            if label != None:
                l = label
            else:
                l = tag.name
            strs[0].parent.insert(0, NavigableString("&lt;"+l+"&gt;"))
            strs[-1].parent.append(NavigableString(""))
            found = True
    return found
github Azure / azure-devops-cli-extension / scripts / fixCodeCoverageStyle.py View on Github external
def embed_css_in_html_file(html_file, css_dir):
    with open(html_file, 'r') as f:
        soup = bs4.BeautifulSoup(f.read(), "html.parser")

    stylesheets = soup.findAll("link", {"rel": "stylesheet"})
    for s in stylesheets:
        t = soup.new_tag('style')
        css_file = s["href"]
        print(f"found link to {css_file}")
        with open(os.path.join(css_dir, css_file), 'r') as f:
            c = bs4.element.NavigableString(f.read())
        t.insert(0, c)
        t['type'] = 'text/css'
        s.replaceWith(t)

    with open(html_file, 'w') as f:
        f.write(str(soup))
github nnsun / CourseGrab / CourseGrab / WebJob / bs4 / builder / _html5lib.py View on Github external
string_child = child = node
        elif isinstance(node, Tag):
            # Some other piece of code decided to pass in a Tag
            # instead of creating an Element object to contain the
            # Tag.
            child = node
        elif node.element.__class__ == NavigableString:
            string_child = child = node.element
        else:
            child = node.element

        if not isinstance(child, basestring) and child.parent is not None:
            node.element.extract()

        if (string_child and self.element.contents
            and self.element.contents[-1].__class__ == NavigableString):
            # We are appending a string onto another string.
            # TODO This has O(n^2) performance, for input like
            # "aaa..."
            old_element = self.element.contents[-1]
            new_element = self.soup.new_string(old_element + string_child)
            old_element.replace_with(new_element)
            self.soup._most_recent_element = new_element
        else:
            if isinstance(node, basestring):
                # Create a brand new NavigableString from this string.
                child = self.soup.new_string(node)

            # Tell Beautiful Soup to act as if it parsed this element
            # immediately after the parent's last descendant. (Or
            # immediately after the parent, if it has no children.)
            if self.element.contents:
github rembo10 / headphones / bs4 / __init__.py View on Github external
def new_string(self, s):
        """Create a new NavigableString associated with this soup."""
        navigable = NavigableString(s)
        navigable.setup()
        return navigable
github qkaren / converse_reading_cmr / data / src / create_official_data.py View on Github external
def insert_escaped_tags(tags, label=None):
    """For each tag in "tags", insert contextual tags (e.g., <p> </p>) as escaped text
       so that these tags are still there when html markup is stripped out."""
    found = False
    for tag in tags:
        strs = list(tag.strings)
        if len(strs) &gt; 0:
            if label != None:
                l = label
            else:
                l = tag.name
            strs[0].parent.insert(0, NavigableString("&lt;"+l+"&gt;"))
            strs[-1].parent.append(NavigableString(""))
            found = True
    return found
github thoppe / miniprez / miniprez / tagline.py View on Github external
block = blocks[0]

        # If there aren't any inner sections, we are done
        if block.find() is None:
            return block

        # Othwerwise, fix punctuation errors
        punctuation = ".,!/;:%'\""

        for x in block.find():
            if not isinstance(x, bs4.element.NavigableString):
                continue
            if len(x) &lt;= 1:
                continue
            if x[0] == ' ' and x[1] in punctuation:
                xs = bs4.element.NavigableString(x.string[1:])
                x.replace_with(xs)

        return block
github naokazuterada / MarkdownTOC / bs4 / __init__.py View on Github external
    def new_string(self, s, subclass=NavigableString):
        """Create a new NavigableString associated with this soup."""
        return subclass(s)
github JimmXinu / FanFicFare / included_dependencies / bs4 / builder / _html5lib.py View on Github external
if isinstance(element, Doctype):
                m = doctype_re.match(element)
                if m:
                    name = m.group(1)
                    if m.lastindex &gt; 1:
                        publicId = m.group(2) or ""
                        systemId = m.group(3) or m.group(4) or ""
                        rv.append("""|%s""" %
                                  (' ' * indent, name, publicId, systemId))
                    else:
                        rv.append("|%s" % (' ' * indent, name))
                else:
                    rv.append("|%s" % (' ' * indent,))
            elif isinstance(element, Comment):
                rv.append("|%s" % (' ' * indent, element))
            elif isinstance(element, NavigableString):
                rv.append("|%s\"%s\"" % (' ' * indent, element))
            else:
                if element.namespace:
                    name = "%s %s" % (prefixes[element.namespace],
                                      element.name)
                else:
                    name = element.name
                rv.append("|%s&lt;%s&gt;" % (' ' * indent, name))
                if element.attrs:
                    attributes = []
                    for name, value in element.attrs.items():
                        if isinstance(name, NamespacedAttribute):
                            name = "%s %s" % (prefixes[name.namespace], name.name)
                        if isinstance(value, list):
                            value = " ".join(value)
                        attributes.append((name, value))
github rembo10 / headphones / bs4 / __init__.py View on Github external
    def endData(self, containerClass=NavigableString):
        if self.currentData:
            currentData = u''.join(self.currentData)
            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
                not set([tag.name for tag in self.tagStack]).intersection(
                    self.builder.preserve_whitespace_tags)):
                if '\n' in currentData:
                    currentData = '\n'
                else:
                    currentData = ' '
            self.currentData = []
            if self.parse_only and len(self.tagStack) &lt;= 1 and \
                   (not self.parse_only.text or \
                    not self.parse_only.search(currentData)):
                return
            o = containerClass(currentData)
            self.object_was_parsed(o)