Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
items = root_item.findall(xmletree.prefixtag("didl", "Item"))
if items:
for item in items:
# item types
item_types = []
item_date_modified = None
descriptors = item.findall(xmletree.prefixtag("didl", "Descriptor"))
if descriptors:
for descriptor in descriptors:
statements = descriptor.findall(xmletree.prefixtag("didl", "Statement"))
if statements:
for statement in statements:
rdf_type = statement.find(xmletree.prefixtag("rdf", "type"))
if rdf_type is not None:
item_types.append(rdf_type.text)
dcterms_modified = statement.find(xmletree.prefixtag("dcterms", "modified"))
if dcterms_modified is not None:
item_date_modified = dcterms_modified.text
#logging.debug("item_types: {}".format(item_types))
if 'info:eu-repo/semantics/descriptiveMetadata' in item_types:
# metadata
#logging.debug("metadata")
component = item.find(xmletree.prefixtag("didl", "Component"))
if component is not None:
resource = component.find(xmletree.prefixtag("didl", "Resource"))
if resource is not None:
mods = resource.find(xmletree.prefixtag("mods", "mods"))
if mods is not None:
#logging.debug("mods")
document = mods_crosswalk.mods_xmletree_to_metajson(mods, source, rec_id_prefix)
def get_rml_element_text_as_boolean(rml, element):
element_xmletree = rml.find(xmletree.prefixtag("rml", element))
return xmletree.get_element_text_as_boolean(element_xmletree)
def extract_dmdsecs(mets):
#logging.debug("dmdsecs")
dmdsecs = mets.findall(xmletree.prefixtag("mets", "dmdSec"))
if dmdsecs:
warppers = []
for dmdsec in dmdsecs:
warpper = Warpper()
warpper['rec_id'] = dmdsec.get("ID")
warpper['rec_id_group'] = dmdsec.get("GROUPID")
mdwrap = dmdsec.find(xmletree.prefixtag("mets", "mdWrap"))
warpper['meta_type'] = mdwrap.get("MDTYPE")
xmldatas = mdwrap.findall(xmletree.prefixtag("mets", "xmlData/*"))
if xmldatas is not None:
warpper['records'] = []
for xmldata in xmldatas:
document = convert_xmldata(xmldata, warpper['meta_type'])
warpper['records'].append(document)
def biblfull_xmletree_to_metajson(biblfull, laboratories, projects, source):
""" biblFull xmletree -> MetaJSON Document """
if biblfull is None:
return None
document = Document()
# titleStmt
tei_titlestmt = biblfull.find(xmletree.prefixtag("tei", "titleStmt"))
# editionStmt
tei_editionstmt = biblfull.find(xmletree.prefixtag("tei", "editionStmt"))
# extent
tei_extent = biblfull.find(xmletree.prefixtag("tei", "extent"))
# publicationStmt
tei_publicationstmt = biblfull.find(xmletree.prefixtag("tei", "publicationStmt"))
# seriesStmt
tei_seriesstmt = biblfull.find(xmletree.prefixtag("tei", "seriesStmt"))
# notesStmt
tei_notesstmt = biblfull.find(xmletree.prefixtag("tei", "notesStmt"))
# sourceDesc
tei_sourcedescs = biblfull.findall(xmletree.prefixtag("tei", "sourceDesc"))
# profileDesc
tei_profiledesc = biblfull.find(xmletree.prefixtag("tei", "profileDesc"))
tei_langusage = tei_profiledesc.find(xmletree.prefixtag("tei", "langUsage"))
tei_languages = tei_langusage.findall(xmletree.prefixtag("tei", "language"))
tei_textclass = tei_profiledesc.find(xmletree.prefixtag("tei", "textClass"))
tei_keywords = tei_textclass.findall(xmletree.prefixtag("tei", "keywords"))
if rdf_type is not None:
item_types.append(rdf_type.text)
dcterms_modified = statement.find(xmletree.prefixtag("dcterms", "modified"))
if dcterms_modified is not None:
item_date_modified = dcterms_modified.text
#logging.debug("item_types: {}".format(item_types))
if 'info:eu-repo/semantics/descriptiveMetadata' in item_types:
# metadata
#logging.debug("metadata")
component = item.find(xmletree.prefixtag("didl", "Component"))
if component is not None:
resource = component.find(xmletree.prefixtag("didl", "Resource"))
if resource is not None:
mods = resource.find(xmletree.prefixtag("mods", "mods"))
if mods is not None:
#logging.debug("mods")
document = mods_crosswalk.mods_xmletree_to_metajson(mods, source, rec_id_prefix)
if item_date_modified:
document["rec_modified_date"] = item_date_modified
elif 'info:eu-repo/semantics/objectFile' in item_types:
# resource
#logging.debug("resource")
url = None
date_last_accessed = None
relation_type = "publication"
relation_version = None
access_rights = "openAccess"
rec_state = "published"
format_mimetype = None
def parse_xmletree_str(input_string):
xmletree.register_namespaces()
return ET.fromstring(input_string)
person.update(get_rml_textlangs_and_set_key(rml_person, "biography", "biographies"))
# dateOfBirth -> date_birth
person.update(get_rml_element_text_and_set_key(rml_person, "dateOfBirth", "date_birth"))
# dateOfDeath -> date_death
person.update(get_rml_element_text_and_set_key(rml_person, "dateOfDeath", "date_death"))
# degree -> degrees
person.update(get_rml_degrees(rml_person))
# email -> emails
person.update(get_rml_emails(rml_person))
# @fictitious -> fictitious
person.update(xmletree.get_element_attribute_as_boolean_and_set_key(rml_person, "fictitious", "fictitious"))
# firstname -> name_given
person.update(get_rml_element_text_and_set_key(rml_person, "firstname", "name_given"))
# identifier -> identifiers & rec_id
person.update(get_rml_identifiers(rml_person))
# image -> resources[i]
person.update(get_rml_images(rml_person, "picture"))
# instantMessage -> instant_messages
person.update(get_rml_instant_messages(rml_person))
# languageCapability -> language_capabilities
person.update(get_rml_language_capabilities(rml_person))
def metajson_to_mods_xmletree(document, with_schema_location=False):
""" MetaJSON Document -> MODS xmletree """
rec_id = document["rec_id"]
xmletree.register_namespaces()
# mods root
mods_root = ET.Element(xmletree.prefixtag("mods", "mods"), version="3.5")
if with_schema_location:
mods_root.set(xmletree.prefixtag("xsi", "schemaLocation"), constants.xmlns_map["mods"] + " " + constants.xmlns_schema_map["mods"])
# titleInfoProper
titleInfoProper = ET.SubElement(mods_root, "titleInfo")
if "title" in document:
title = ET.SubElement(titleInfoProper, "title")
title.text = document["title"]
if "title_non_sort" in document:
nonSort = ET.SubElement(titleInfoProper, "nonSort")
nonSort.text = document["title_non_sort"]
if "title_sub" in document:
subTitle = ET.SubElement(titleInfoProper, "subTitle")
subTitle.text = document["title_sub"]
def response_to_xmletree(response):
xmletree.register_namespaces()
return ET.fromstring(response.content)
documents = []
if openurl_response is not None:
#logging.debug(type(openurl_response))
#logging.debug(openurl_response)
# results
openurl_results = openurl_response.find(xmletree.prefixtag("ssopenurl", "results"))
if openurl_results is not None:
# result
openurl_result_list = openurl_results.findall(xmletree.prefixtag("ssopenurl", "result"))
if openurl_result_list:
for openurl_result in openurl_result_list:
document = Document()
if source:
document["source"] = source
# citation
openurl_citation = openurl_result.find(xmletree.prefixtag("ssopenurl", "citation"))
if openurl_citation is not None:
# issn
openurl_issn = openurl_citation.find(xmletree.prefixtag("ssopenurl", "issn"))
if openurl_issn is not None:
identifier_issn = Identifier()
identifier_issn["id_type"] = "issn"
identifier_issn["value"] = openurl_issn.text
document.add_item_to_key(identifier_issn, "identifiers")
# eissn
openurl_eissn = openurl_citation.find(xmletree.prefixtag("ssopenurl", "eissn"))
if openurl_eissn is not None:
identifier_eissn = Identifier()
identifier_eissn["id_type"] = "eissn"
identifier_eissn["value"] = openurl_eissn.text
document.add_item_to_key(identifier_eissn, "identifiers")
# linkGroups