How to use the pymarc.parse_xml_to_array function in pymarc

To help you get started, we’ve selected a few pymarc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edsu / pymarc / test / test_json.py View on Github external
def setUp(self):
        self.reader_dat = pymarc.MARCReader(open("test/one.dat", "rb"))
        self.parse_json = pymarc.parse_json_to_array(open("test/one.json"))

        self.batch_xml = pymarc.parse_xml_to_array(open("test/batch.xml"))
        self.batch_json = pymarc.parse_json_to_array(open("test/batch.json"))
github edsu / pymarc / test / test_xml.py View on Github external
def test_parse_to_array(self):
        records = pymarc.parse_xml_to_array("test/batch.xml")
        self.assertEqual(len(records), 2)

        # should've got two records
        self.assertEqual(type(records[0]), pymarc.Record)
        self.assertEqual(type(records[1]), pymarc.Record)

        # first record should have 18 fields
        record = records[0]
        self.assertEqual(len(record.get_fields()), 18)

        # check the content of a control field
        self.assertEqual(
            record["008"].data, u"910926s1957    nyuuun              eng  "
        )

        # check a data field with subfields
github edsu / pymarc / test / test_xml.py View on Github external
def test_bad_tag(self):
        a = pymarc.parse_xml_to_array(open("test/bad_tag.xml"))
        self.assertEqual(len(a), 1)
github edsu / pymarc / test / test_xml.py View on Github external
def test_xml(self):
        # read in xml to a record
        record1 = pymarc.parse_xml_to_array("test/batch.xml")[0]
        # generate xml
        xml = pymarc.record_to_xml(record1)
        # parse generated xml
        record2 = pymarc.parse_xml_to_array(BytesIO(xml))[0]

        # compare original and resulting record
        self.assertEqual(record1.leader, record2.leader)

        field1 = record1.get_fields()
        field2 = record2.get_fields()
        self.assertEqual(len(field1), len(field2))

        pos = 0
        while pos < len(field1):
            self.assertEqual(field1[pos].tag, field2[pos].tag)
            if field1[pos].is_control_field():
github LibraryOfCongress / chronam / core / management / commands / purge_etitles.py View on Github external
def handle(self, **options):
        for title in Title.objects.filter(urls__value__icontains='chroniclingamerica'):
            record = pymarc.parse_xml_to_array(StringIO(title.marc.xml))[0]
            if record['245']['h'] == '[electronic resource].':
                if options['pretend']:
                    self.stdout.write(title)
                else:
                    self.stdout.write("deleting %s [%s] from solr index")
                    index.delete_title(title)
                    self.stdout.write("purging %s [%s]" % (title, title.lccn))
                    title.delete()
        if not options['pretend']:
            index.commit()
github miku / siskin / siskin / assets / 30 / 30_marcbinary.py View on Github external
continue
        cleaned_subfields.append(code)
        cleaned_subfields.append(value)
    return cleaned_subfields


inputfilename = "30_input.xml"
outputfilename = "30_output.mrc"

if len(sys.argv) >= 3:
    inputfilename, outputfilename = sys.argv[1:3]

inputfile = io.open(inputfilename, "rb")
outputfile = io.open(outputfilename, "wb")

reader = pymarc.parse_xml_to_array(inputfile)

for oldrecord in reader:

    newrecord = marcx.Record()
    newrecord.strict = False

    # prüfen, ob Titel vorhanden ist
    if not oldrecord["245"]:
        continue

    # leader
    newrecord.leader = "     " + oldrecord.leader[5:]
    if len(newrecord.leader) < 9:
        logging.debug("too short %s: %s", len(newrecord.leader), newrecord.leader)
        continue
github htrc / htrc-feature-reader / htrc_features / feature_reader.py View on Github external
Fetch additional information about a volume from the HathITrust Bibliographic API.

        See: https://www.hathitrust.org/bib_api

        return: A `pymarc` record. See pymarc's documentation for details on using it.
        """
        if not self._extra_metadata:
            logging.debug("Looking up full metadata for {0}".format(self.id))
            data = requests.get(self.ht_bib_url).json()

            record_id = data['items'][0]['fromRecord']
            marc = data['records'][record_id]['marc-xml']

            # Pymarc only reads a file, so stream the text as if it was one
            xml_stream = StringIO(marc)
            xml_record = pymarc.parse_xml_to_array(xml_stream)[0]
            xml_stream.close()

            self._extra_metadata = xml_record
        return self._extra_metadata
github miku / siskin / siskin / assets / 52 / 52_marcbinary.py View on Github external
copytags = ("003", "005", "006", "007", "008", "020", "022", "024", "035", "040", "084", "100", "110", "245", "246", "260", "300", "310", "362", "490", "520",
            "650", "651", "700", "710", "760", "762", "773", "775", "780", "785", "830")

inputfilename = "52_input.xml"
outputfilename = "52_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")

# reader = pymarc.MARCReader(inputfile, force_utf8=True)
with open(inputfilename) as handle:
    records = pymarc.parse_xml_to_array(handle)

for oldrecord in records:

    newrecord = marcx.Record(force_utf8=True)

    # leader
    leader = "     " + oldrecord.leader[5:]
    newrecord.leader = leader

    # 001
    f001 = oldrecord["001"].data
    f001 = f001.replace("-", "")
    f001 = f001.replace("_", "")
    newrecord.add("001", data="finc-52-%s" % f001)

    # ISBN
github miku / siskin / siskin / assets / 159 / 159_marcbinary.py View on Github external
import sys

import marcx
import pymarc
from siskin.mappings import formats
from siskin.utils import marc_clean_record

inputfilename = "159_input.xml"
outputfilename = "159_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
reader = pymarc.parse_xml_to_array(inputfile)

for record in reader:

    record = marcx.Record.from_record(record)
    record.force_utf8 = True
    record.strict = False

    # Formatfestlegung
    format = "Manuscript"

    # Leader
    leader = formats[format]["Leader"]
    record.leader = leader

    # Identifikator
    f001 = record["001"].data
github miku / siskin / siskin / assets / 156 / 156_marcbinary.py View on Github external
import pymarc
from siskin.mappings import formats
from siskin.utils import check_isbn, check_issn, marc_clean_record

copytags = ("100", "105", "120", "130", "150", "174", "200", "245", "246", "250", "260", "300", "335", "351", "361", "400", "500", "520", "650", "689", "700",
            "710", "800")

inputfilename = "156_input.xml"
outputfilename = "156_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
oldrecords = pymarc.parse_xml_to_array(inputfile)

for i, oldrecord in enumerate(oldrecords, start=1):

    try:
        f245a = oldrecord["245"]["a"]
    except:
        continue

    newrecord = marcx.Record(force_utf8=True)
    newrecord.strict = False

    # pauschale Festlegung
    format = "Book"

    # leader
    leader = formats[format]["Leader"]