Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.reader_dat = pymarc.MARCReader(open("test/one.dat", "rb"))
self.parse_json = pymarc.parse_json_to_array(open("test/one.json"))
self.batch_xml = pymarc.parse_xml_to_array(open("test/batch.xml"))
self.batch_json = pymarc.parse_json_to_array(open("test/batch.json"))
def test_parse_to_array(self):
records = pymarc.parse_xml_to_array("test/batch.xml")
self.assertEqual(len(records), 2)
# should've got two records
self.assertEqual(type(records[0]), pymarc.Record)
self.assertEqual(type(records[1]), pymarc.Record)
# first record should have 18 fields
record = records[0]
self.assertEqual(len(record.get_fields()), 18)
# check the content of a control field
self.assertEqual(
record["008"].data, u"910926s1957 nyuuun eng "
)
# check a data field with subfields
def test_bad_tag(self):
a = pymarc.parse_xml_to_array(open("test/bad_tag.xml"))
self.assertEqual(len(a), 1)
def test_xml(self):
# read in xml to a record
record1 = pymarc.parse_xml_to_array("test/batch.xml")[0]
# generate xml
xml = pymarc.record_to_xml(record1)
# parse generated xml
record2 = pymarc.parse_xml_to_array(BytesIO(xml))[0]
# compare original and resulting record
self.assertEqual(record1.leader, record2.leader)
field1 = record1.get_fields()
field2 = record2.get_fields()
self.assertEqual(len(field1), len(field2))
pos = 0
while pos < len(field1):
self.assertEqual(field1[pos].tag, field2[pos].tag)
if field1[pos].is_control_field():
def handle(self, **options):
for title in Title.objects.filter(urls__value__icontains='chroniclingamerica'):
record = pymarc.parse_xml_to_array(StringIO(title.marc.xml))[0]
if record['245']['h'] == '[electronic resource].':
if options['pretend']:
self.stdout.write(title)
else:
self.stdout.write("deleting %s [%s] from solr index")
index.delete_title(title)
self.stdout.write("purging %s [%s]" % (title, title.lccn))
title.delete()
if not options['pretend']:
index.commit()
continue
cleaned_subfields.append(code)
cleaned_subfields.append(value)
return cleaned_subfields
inputfilename = "30_input.xml"
outputfilename = "30_output.mrc"
if len(sys.argv) >= 3:
inputfilename, outputfilename = sys.argv[1:3]
inputfile = io.open(inputfilename, "rb")
outputfile = io.open(outputfilename, "wb")
reader = pymarc.parse_xml_to_array(inputfile)
for oldrecord in reader:
newrecord = marcx.Record()
newrecord.strict = False
# prüfen, ob Titel vorhanden ist
if not oldrecord["245"]:
continue
# leader
newrecord.leader = " " + oldrecord.leader[5:]
if len(newrecord.leader) < 9:
logging.debug("too short %s: %s", len(newrecord.leader), newrecord.leader)
continue
Fetch additional information about a volume from the HathITrust Bibliographic API.
See: https://www.hathitrust.org/bib_api
return: A `pymarc` record. See pymarc's documentation for details on using it.
"""
if not self._extra_metadata:
logging.debug("Looking up full metadata for {0}".format(self.id))
data = requests.get(self.ht_bib_url).json()
record_id = data['items'][0]['fromRecord']
marc = data['records'][record_id]['marc-xml']
# Pymarc only reads a file, so stream the text as if it was one
xml_stream = StringIO(marc)
xml_record = pymarc.parse_xml_to_array(xml_stream)[0]
xml_stream.close()
self._extra_metadata = xml_record
return self._extra_metadata
copytags = ("003", "005", "006", "007", "008", "020", "022", "024", "035", "040", "084", "100", "110", "245", "246", "260", "300", "310", "362", "490", "520",
"650", "651", "700", "710", "760", "762", "773", "775", "780", "785", "830")
inputfilename = "52_input.xml"
outputfilename = "52_output.mrc"
if len(sys.argv) == 3:
inputfilename, outputfilename = sys.argv[1:]
inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
# reader = pymarc.MARCReader(inputfile, force_utf8=True)
with open(inputfilename) as handle:
records = pymarc.parse_xml_to_array(handle)
for oldrecord in records:
newrecord = marcx.Record(force_utf8=True)
# leader
leader = " " + oldrecord.leader[5:]
newrecord.leader = leader
# 001
f001 = oldrecord["001"].data
f001 = f001.replace("-", "")
f001 = f001.replace("_", "")
newrecord.add("001", data="finc-52-%s" % f001)
# ISBN
import sys
import marcx
import pymarc
from siskin.mappings import formats
from siskin.utils import marc_clean_record
inputfilename = "159_input.xml"
outputfilename = "159_output.mrc"
if len(sys.argv) == 3:
inputfilename, outputfilename = sys.argv[1:]
inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
reader = pymarc.parse_xml_to_array(inputfile)
for record in reader:
record = marcx.Record.from_record(record)
record.force_utf8 = True
record.strict = False
# Formatfestlegung
format = "Manuscript"
# Leader
leader = formats[format]["Leader"]
record.leader = leader
# Identifikator
f001 = record["001"].data
import pymarc
from siskin.mappings import formats
from siskin.utils import check_isbn, check_issn, marc_clean_record
copytags = ("100", "105", "120", "130", "150", "174", "200", "245", "246", "250", "260", "300", "335", "351", "361", "400", "500", "520", "650", "689", "700",
"710", "800")
inputfilename = "156_input.xml"
outputfilename = "156_output.mrc"
if len(sys.argv) == 3:
inputfilename, outputfilename = sys.argv[1:]
inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
oldrecords = pymarc.parse_xml_to_array(inputfile)
for i, oldrecord in enumerate(oldrecords, start=1):
try:
f245a = oldrecord["245"]["a"]
except:
continue
newrecord = marcx.Record(force_utf8=True)
newrecord.strict = False
# pauschale Festlegung
format = "Book"
# leader
leader = formats[format]["Leader"]