How to use the extruct.rdflibxml.host.__init__.MediaTypes function in extruct

To help you get started, we’ve selected a few extruct examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
]

_XHTML_1_1 = [
    ("-//W3C//DTD XHTML+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd"),
    ("-//W3C//DTD HTML 4.01+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd")
]
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
# mapping from (some) content types to RDFa host languages. This may control the exact processing or at least the initial context (see below)...
content_to_host_language = {
    MediaTypes.html        : HostLanguage.html5,
    MediaTypes.xhtml    : HostLanguage.xhtml,
    MediaTypes.xml        : HostLanguage.rdfa_core,
    MediaTypes.xmlt        : HostLanguage.rdfa_core,
    MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
]

_XHTML_1_1 = [
    ("-//W3C//DTD XHTML+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd"),
    ("-//W3C//DTD HTML 4.01+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd")
]

_XHTML = [
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
turtle     = 'text/turtle'
    html    = 'text/html'
    xhtml    = 'application/xhtml+xml'
    svg        = 'application/svg+xml'
    svgi    = 'image/svg+xml'
    smil    = 'application/smil+xml'
    atom    = 'application/atom+xml'
    xml        = 'application/xml'
    xmlt    = 'text/xml'
    nt        = 'text/plain'

# mapping from (some) content types to RDFa host languages. This may control the exact processing or at least the initial context (see below)...
content_to_host_language = {
    MediaTypes.html        : HostLanguage.html5,
    MediaTypes.xhtml    : HostLanguage.xhtml,
    MediaTypes.xml        : HostLanguage.rdfa_core,
    MediaTypes.xmlt        : HostLanguage.rdfa_core,
    MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
MediaTypes.xmlt        : HostLanguage.rdfa_core,
    MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
]

_XHTML_1_1 = [
    ("-//W3C//DTD XHTML+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd"),
    ("-//W3C//DTD HTML 4.01+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd")
]
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
html    = 'text/html'
    xhtml    = 'application/xhtml+xml'
    svg        = 'application/svg+xml'
    svgi    = 'image/svg+xml'
    smil    = 'application/smil+xml'
    atom    = 'application/atom+xml'
    xml        = 'application/xml'
    xmlt    = 'text/xml'
    nt        = 'text/plain'

# mapping from (some) content types to RDFa host languages. This may control the exact processing or at least the initial context (see below)...
content_to_host_language = {
    MediaTypes.html        : HostLanguage.html5,
    MediaTypes.xhtml    : HostLanguage.xhtml,
    MediaTypes.xml        : HostLanguage.rdfa_core,
    MediaTypes.xmlt        : HostLanguage.rdfa_core,
    MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
svgi    = 'image/svg+xml'
    smil    = 'application/smil+xml'
    atom    = 'application/atom+xml'
    xml        = 'application/xml'
    xmlt    = 'text/xml'
    nt        = 'text/plain'

# mapping from (some) content types to RDFa host languages. This may control the exact processing or at least the initial context (see below)...
content_to_host_language = {
    MediaTypes.html        : HostLanguage.html5,
    MediaTypes.xhtml    : HostLanguage.xhtml,
    MediaTypes.xml        : HostLanguage.rdfa_core,
    MediaTypes.xmlt        : HostLanguage.rdfa_core,
    MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
]

_XHTML_1_1 = [
    ("-//W3C//DTD XHTML+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd"),
    ("-//W3C//DTD HTML 4.01+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd")
]

_XHTML = [
    ("-//W3C//DTD XHTML 1.0 Strict//EN",       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
    ("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"),
    ("-//W3C//DTD XHTML 1.1//EN",              "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd")
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
content_to_host_language = {
    MediaTypes.html        : HostLanguage.html5,
    MediaTypes.xhtml    : HostLanguage.xhtml,
    MediaTypes.xml        : HostLanguage.rdfa_core,
    MediaTypes.xmlt        : HostLanguage.rdfa_core,
    MediaTypes.smil        : HostLanguage.rdfa_core,
    MediaTypes.svg        : HostLanguage.svg,
    MediaTypes.svgi        : HostLanguage.svg,
    MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
]
github scrapinghub / extruct / extruct / rdflibxml / host / __init__.py View on Github external
MediaTypes.atom        : HostLanguage.atom,
}

# mapping preferred suffixes to media types...
preferred_suffixes = {
    ".rdf"        : MediaTypes.rdfxml,
    ".ttl"        : MediaTypes.turtle,
    ".n3"        : MediaTypes.turtle,
    ".owl"        : MediaTypes.rdfxml,
    ".html"        : MediaTypes.html,
    ".shtml"    : MediaTypes.html,
    ".xhtml"    : MediaTypes.xhtml,
    ".svg"        : MediaTypes.svg,
    ".smil"        : MediaTypes.smil,
    ".xml"        : MediaTypes.xml,
    ".nt"        : MediaTypes.nt,
    ".atom"        : MediaTypes.atom
}

# DTD combinations that may determine the host language and the rdfa version
_XHTML_1_0 = [
    ("-//W3C//DTD XHTML+RDFa 1.0//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd")
]

_XHTML_1_1 = [
    ("-//W3C//DTD XHTML+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-2.dtd"),
    ("-//W3C//DTD HTML 4.01+RDFa 1.1//EN", "http://www.w3.org/MarkUp/DTD/html401-rdfa11-1.dtd")
]

_XHTML = [
    ("-//W3C//DTD XHTML 1.0 Strict//EN",       "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"),
    ("-//W3C//DTD XHTML 1.0 Transitional//EN", "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"),