Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse_metajson_file(file_path):
with open(file_path) as metajson_file:
metajson = jsonbson.load_json_file(metajson_file)
if "records" in metajson:
for record in metajson["records"]:
if record:
yield metajson_service.load_dict(record)
has_isbn = False
has_eissn = False
identifiers_item = []
is_part_of_identifiers = []
for sum_key in summon_identifier_type_to_metajson_identifier_type:
if sum_key in sum_doc:
for id_value in sum_doc[sum_key]:
id_type = summon_identifier_type_to_metajson_identifier_type[sum_key]
if id_type == "issn":
is_part_of_identifiers.append(metajson_service.create_identifier(id_type, id_value))
elif id_type == "eissn":
has_eissn = True
is_part_of_identifiers.append(metajson_service.create_identifier(id_type, id_value))
elif id_type == "isbn":
has_isbn = True
is_part_of_identifiers.append(metajson_service.create_identifier(id_type, id_value))
else:
identifiers_item.append(metajson_service.create_identifier(id_type, id_value))
# is_part_of_type determination
is_part_of_type = None
if sum_type in summon_document_type_to_metajson_document_is_part_of_type:
is_part_of_type = summon_document_type_to_metajson_document_is_part_of_type[sum_type]
elif is_part_of_title and is_part_of_title != title and rec_type not in ["Book", "Journal", "Magazine", "Newspaper", "Periodical"]:
if has_isbn:
is_part_of_type = "Book"
elif has_eissn:
is_part_of_type = "Journal"
elif is_part_of_title.lower().find("conference") != -1:
is_part_of_type = "Book"
elif is_part_of_title.lower().find("review") or is_part_of_title.lower().find("journal"):
def get_document_by_rec_id(corpus, rec_id):
if not corpus:
corpus = default_corpus
result = mongodb[database_name(corpus)][DOCUMENTS].find_one({"rec_id": rec_id})
if result:
return metajson_service.load_dict(result)
else:
raise exceptions.metajsonprc_error(1)
def write_metajson_collection(col_id, col_title, items, output_file_path):
if items:
#logging.debug("write_metajson_collection type(items): {}".format(type(items)))
collection = metajson_service.create_collection(col_id, col_title, items)
write_json(collection, output_file_path)
elif input_type == constants.FILE_TYPE_TXT:
# txt
metajson_list = parse_and_convert_txt_lines(input_file_path, input_format, source, rec_id_prefix, only_first_record)
elif input_type == constants.FILE_TYPE_MARC:
# marc
metajson_list = parse_and_convert_marc(input_file_path, input_format, source, rec_id_prefix, only_first_record)
elif input_type == constants.FILE_TYPE_CSV:
# csv
metajson_list = parse_and_convert_csv(input_file_path, input_format, source, rec_id_prefix, only_first_record)
if metajson_list:
# enhance metajson list
metajson_list = metajson_service.enhance_metajson_list(metajson_list)
return convert_metajson_list(metajson_list, output_format, all_in_one_file)
phones = []
for rml_phone in rml_phones:
if rml_phone is not None:
# @preferred -> preferred
preferred = xmletree.get_element_attribute_as_boolean(rml_phone, "preferred")
# @relationType -> relation_type
relation_type = rml_phone.get("relationType")
# @type -> phone_type
phone_type = rml_phone.get("type")
# @visible -> visible
visible = xmletree.get_element_attribute_as_boolean(rml_phone, "visible")
# formatted -> formatted
rml_formatted = rml_phone.find(xmletree.prefixtag("rml", "formatted"))
formatted = xmletree.get_element_text(rml_formatted)
phone = metajson_service.create_phone(formatted, phone_type, preferred, relation_type, visible)
if phone:
phones.append(phone)
if phones:
result["phones"] = phones
return result
def export_metajson_collection(col_id, col_title, metajson_list, output_file_path):
if metajson_list:
with open(output_file_path, "w") as output_file:
collection = metajson_service.create_collection(col_id, col_title, metajson_list)
dump = jsonbson.dumps_bson(collection, True)
output_file.write(dump)
return dump
if seriess:
related["seriess"] = seriess
if resources:
related["resources"] = resources
if related:
# debug
#logging.debug("document.rec_type: {}".format(document["rec_type"]))
#logging.debug("document.title: {}".format(document["title"]))
#logging.debug("field: {}".format(field))
# rec_type
if relateditems_dict[field.tag][1] == "same":
related["rec_type"] = document["rec_type"]
elif relateditems_dict[field.tag][1] == constants.DOC_TYPE_SERIES:
related["rec_type"] = constants.DOC_TYPE_SERIES
elif relateditems_dict[field.tag][1] == "is_part_ofs":
related["rec_type"] = metajson_service.get_is_part_of_rec_type_from_root_rec_type(document["rec_type"])
elif relateditems_dict[field.tag][1] == "has_parts":
related["rec_type"] = metajson_service.get_has_part_rec_type_from_root_rec_type(document["rec_type"])
elif relateditems_dict[field.tag][1] == "is_review_ofs":
related["rec_type"] = constants.DOC_TYPE_BOOK
else:
related["rec_type"] = constants.DOC_TYPE_DOCUMENT
if related["rec_type"] == constants.DOC_TYPE_DOCUMENT and "rec_type_description" in related:
if related["rec_type_description"] == "Images animées":
related["rec_type"] = constants.DOC_TYPE_VIDEORECORDING
#logging.debug("related.rec_type: {}".format(related["rec_type"]))
# title
if "title" not in related:
related["title"] = ""
#logging.debug("related.title: {}".format(related["title"]))
#logging.debug("related property: {}".format(relateditems_dict[field.tag][0]))
# add to document properties
# extract the relatedItem type attribute
mods_related_item_type = mods_related_item.get("type")
# mods_related_item_type in : preceding, succeeding, original, host, constituent, series, otherVersion, otherFormat, isReferencedBy, references, reviewOf)
# convert like a mods record
related_item = mods_root_or_related_item_to_metajson(mods_related_item, root_rec_type)
if related_item is not None:
# extract related_item rec_type
related_item_rec_type = related_item["rec_type"]
#logging.debug("root_rec_type: {} related_item_rec_type: {} mods_related_item_type: {} ".format(root_rec_type, related_item_rec_type, mods_related_item_type))
if mods_related_item_type == "host":
# move the part fields from the related item to the root document
metajson_service.move_keys_between_dicts(MODS_PART_FIELDS, related_item, result)
# copy the date fields from the related item to the root document
if root_rec_type in MODS_ARTICLE_TYPES:
metajson_service.copy_keys_between_dicts(MODS_DATE_FIELDS, related_item, result)
# host -> is_part_ofs
result.add_item_to_key(related_item, "is_part_ofs")
elif mods_related_item_type == "original":
if root_rec_type in ["BookReview", "ArticleReview"]:
# original -> is_review_ofs
result.add_item_to_key(related_item, "is_review_ofs")
else:
# original -> originals
result.add_item_to_key(related_item, "originals")