How to use the extruct.rdflibxml.host.HostLanguage.xhtml5 function in extruct

To help you get started, we’ve selected a few extruct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / extruct / extruct / rdflibxml / transform / OpenID.py View on Github external
def OpenID_transform(html, options, state) :
    """
    Replace C{openid.XXX} type C{@rel} attribute values in C{} elements by C{openid:XXX}. The openid URI is also
    added to the top level namespaces with the C{openid:} local name.

    @param html: a DOM node for the top level html element
    @param options: invocation options
    @type options: L{Options}
    @param state: top level execution state
    @type state: L{State}
    """
    from ..host import HostLanguage
    if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ) :
        return

    # the head element is necessary; to be sure, the namespaces are set
    # on that level only
    head = None
    try :
        head = html.getElementsByTagName("head")[0]
    except :
        # no head....
        return

    foundOpenId = False
    for link in html.getElementsByTagName("link") :
        if link.hasAttribute("rel") :
            rel = link.getAttribute("rel")
            newProp = ""
github scrapinghub / extruct / extruct / rdflibxml / transform / DublinCore.py View on Github external
def DC_transform(html, options, state) :
    """
    @param html: a DOM node for the top level html element
    @param options: invocation options
    @type options: L{Options}
    @param state: top level execution state
    @type state: L{State}
    """
    from ..host import HostLanguage
    if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ) :
        return
    
    # the head element is necessary; to be sure, the namespaces are set
    # on that level only
    head = None
    try :
        head = html.getElementsByTagName("head")[0]
    except :
        # no head....
        return

    # At first, the DC namespaces must be found
    dcprefixes = {}
    for link in html.getElementsByTagName("link") :
        if link.hasAttribute("rel") :
            rel = link.getAttribute("rel")
github scrapinghub / extruct / extruct / rdflibxml / transform / metaname.py View on Github external
def meta_transform(html, options, state) :
    """
    @param html: a DOM node for the top level html element
    @param options: invocation options
    @type options: L{Options}
    @param state: top level execution state
    @type state: L{State}
    """
    from ..host import HostLanguage
    if not( options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] ) :
        return

    for meta in html.getElementsByTagName("meta") :
        if meta.hasAttribute("name") and not meta.hasAttribute("property") :
            meta.setAttribute("property", meta.getAttribute("name"))
github scrapinghub / extruct / extruct / rdflibxml / state.py View on Github external
top_version = node.getAttribute("version")
                if top_version.find("RDFa 1.0") != -1 or top_version.find("RDFa1.0") != -1 :
                    self.rdfa_version = "1.0"
                elif top_version.find("RDFa 1.1") != -1 or top_version.find("RDFa1.1") != -1 :
                    self.rdfa_version = "1.1"

            # this is just to play safe. I believe this should actually not happen...
            if options == None :
                from . import Options
                self.options = Options()
            else :
                self.options = options

            self.base = ""
            # handle the base element case for HTML
            if self.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5  ] :
                for bases in node.getElementsByTagName("base") :
                    if bases.hasAttribute("href") :
                        self.base = remove_frag_id(bases.getAttribute("href"))
                        continue
            elif self.options.host_language in accept_xml_base and node.hasAttribute("xml:base") :
                self.base = remove_frag_id(node.getAttribute("xml:base"))

            # If no local setting for base occurs, the input argument has it
            if self.base == "" :
                self.base = base

            # Perform an extra beautification in RDFLib
            if self.options.host_language in beautifying_prefixes :
                dict = beautifying_prefixes[self.options.host_language]
                for key in dict :
                    graph.bind(key,dict[key])
github scrapinghub / extruct / extruct / rdflibxml / utils.py View on Github external
    @return: string
    """
    node = inode.cloneNode(True)
    # Decorate the element with namespaces.lang values and, optionally, base
    if base :
        node.setAttribute("xml:base",state.base)
    if xmlns :
        for prefix in state.term_or_curie.xmlns :
            if not node.hasAttribute("xmlns:%s" % prefix) :
                node.setAttribute("xmlns:%s" % prefix,"%s" % state.term_or_curie.xmlns[prefix])
        # Set the default namespace, if not done (and is available)
        if not node.getAttribute("xmlns") and state.defaultNS != None :
            node.setAttribute("xmlns", state.defaultNS)
    # Get the lang, if necessary
    if state.lang :
        if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.xhtml5, HostLanguage.html5 ] :
            if not node.getAttribute("lang") :
                node.setAttribute("lang", state.lang)
        else :
            if not node.getAttribute("xml:lang") :
                node.setAttribute("xml:lang", state.lang)
    if sys.version_info[0] >= 3 :
        return node.toxml()
    else :
        q = node.toxml(encoding='utf-8')
        return unicode(q, encoding='utf-8')
github scrapinghub / extruct / extruct / rdflibxml / transform / __init__.py View on Github external
if not has_one_of_attributes(node, "href", "resource", "about", "src") :
                node.setAttribute("about","")

    from ..host import HostLanguage
    from ..utils import has_one_of_attributes

    if not has_one_of_attributes(root, "about") :
        # The situation is a bit complicated: if a @resource is present without anything else, then it sets
        # the subject, ie, should be accepted...
        if has_one_of_attributes(root, "resource", "href", "src") :
            if has_one_of_attributes(root, "rel", "rev","property") :
                root.setAttribute("about","")
        else :
            root.setAttribute("about","")

    if options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] :
        if state.rdfa_version >= "1.1" :
            pass
        else :
            for top in root.getElementsByTagName("head") :
                if not has_one_of_attributes(top, "href", "resource", "about", "src") :
                    set_about(top)
            for top in root.getElementsByTagName("body") :
                if not has_one_of_attributes(top, "href", "resource", "about", "src") :
                    set_about(top)
github scrapinghub / extruct / extruct / rdflibxml / parse.py View on Github external
def header_check(p_obj) :
        """Special disposition for the HTML  and  elements..."""
        if state.options.host_language in [ HostLanguage.xhtml, HostLanguage.html5, HostLanguage.xhtml5 ] :
            if node.nodeName == "head" or node.nodeName == "body" :
                if not has_one_of_attributes(node, "about", "resource", "src", "href") :
                    return p_obj
        else :
            return None