How to use the pymarc.MARCReader function in pymarc

To help you get started, we’ve selected a few pymarc examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github edsu / pymarc / test / test_marc8.py View on Github external
self.assertEqual(type(utitle), bytes)
            self.assertEqual(utitle, b"De la solitude a\xcc\x80 la communaute\xcc\x81.")

        with open("test/utf8_without_leader_flag.dat", "rb") as fh:
            reader = MARCReader(fh, to_unicode=True, hide_utf8_warnings=True)
            record = next(reader)
            self.assertEqual(type(record), Record)
            utitle = record["240"]["a"]
            self.assertEqual(type(utitle), str)
            # unless you force utf-8 characters will get lost and
            # warnings will appear in the terminal
            self.assertEqual(utitle, "De la solitude a   la communaute .")

        # force reading as utf-8
        with open("test/utf8_without_leader_flag.dat", "rb") as fh:
            reader = MARCReader(
                fh, to_unicode=True, force_utf8=True, hide_utf8_warnings=True
            )
            record = next(reader)
            self.assertEqual(type(record), Record)
            utitle = record["240"]["a"]
            self.assertEqual(type(utitle), str)
            self.assertEqual(
                utitle,
                u"De la solitude a"
                + chr(0x0300)
                + " la communaute"
                + chr(0x0301)
                + ".",
            )
github slub / efre-lod-elasticsearch-tools / helperscripts / fincsolr2marc.py View on Github external
parser.add_argument('-replaceMethod',type=str,default="decimal",help="Which kind of replace method to use. available: ")
    parser.add_argument('-valid',action="store_true",help="validate MARC Records")
    parser.add_argument('-toJson',action="store_true",default=False,help="Transpose to MarcXchange JSON on the fly")
    args=parser.parse_args()
    if args.help:
        parser.print_help(sys.stderr)
        exit()        

    for line in sys.stdin:
        record=json.loads(line)
        if record and record.get("recordtype") and args.format=="marc" and "marc" in record.get("recordtype") and not "xml" in record.get("recordtype"):
            marcFullRecordFixed=fixRecord(record=record.get(args.frfield),record_id=record.get("record_id"),validation=args.valid,replaceMethod=args.replaceMethod)
            if not args.toJson:
                sys.stdout.write(marcFullRecordFixed)
            else:
                for record in MARCReader(marcFullRecordFixed.encode('utf-8'), to_unicode=True):
                    sys.stdout.write(json.dumps(transpose_to_ldj(record)))
        elif record and record.get("recordtype") and "marcxml" in record.get("recordtype") and args.format=="marcxml":
                pymarc.marcxml.parse_xml_to_array(StringIO(record.get(args.frfield))) #need wrapper in StringIO for read()-need in marcxml lib
github miku / siskin / siskin / assets / 161 / 161_marcbinary.py View on Github external
import marcx
import pymarc

copytags = ("003", "005", "006", "007", "008", "020", "040", "043", "050", "082", "100", "245", "246", "250", "264", "300", "336", "337", "338", "490", "500",
            "505", "520", "600", "610", "650", "651", "700", "710", "776", "830", "856")

inputfilename = "161_input.mrc"
outputfilename = "161_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
reader = pymarc.MARCReader(inputfile)

for oldrecord in reader:

    try:
        f245a = oldrecord["245"]["a"]
    except:
        continue

    newrecord = marcx.Record(force_utf8=True)

    # Leader
    leader = "     " + oldrecord.leader[5:]
    newrecord.leader = leader

    # Identifikator
    f001 = oldrecord["001"].data
github slub / efre-lod-elasticsearch-tools / processing / finc2rdf.py View on Github external
def get_contributon(record, prop):
    fullrecord_fixed = fixRecord(record=getProperty(record, prop), record_id=record.get(
        "record_id"), validation=False, replaceMethod='decimal')
    reader = pymarc.MARCReader(fullrecord_fixed.encode('utf-8'))
    data = []
    fields = ["100", "110", "111", "700", "710", "711"]
    for record in reader:
        for field in fields:
            for f in record.get_fields(field):
                contributor = {
                    "@type": ["bf:Contribution"],
                    "bf:agent": {
                        "@id": "http://d-nb.info/gnd/"
                    },
                    "bf:role": {
                        "@id": "http://id.loc.gov/vocabulary/relators/",
                    }
                }
                if f['a']:
                    contributor["bf:agent"]["rdfs:ch_label"] = f['a']
github unt-libraries / catalog-api / django / sierra / export / sierra2marc.py View on Github external
filename = filename or '{}.mrc'.format(timestamp())
        filepath = filepath or '{}'.format(settings.MEDIA_ROOT)
        self.success_count = 0
        # If the file exists and append is True, we want to open the
        # file up, read in the MARC records, then append our
        # marc_records to that.
        existing_records = []
        if filepath[-1] != '/':
            filepath = '{}/'.format(filepath)
        try:
            marcfile = file('{}{}'.format(filepath, filename), 'r')
        except IOError:
            pass
        else:
            if append:
                reader = pymarc.MARCReader(marcfile)
                existing_records.extend(reader)
            else:
                # If we're not appending but we found an existing file,
                # let's find a new filename that doesn't exist.
                file_exists = True
                while file_exists:
                    filename = '{}.mrc'.format(timestamp())
                    try:
                        file('{}{}'.format(filepath, filename), 'r')
                    except IOError:
                        file_exists = False

        try:
            marcfile = file('{}{}'.format(filepath, filename), 'w')
        except IOError:
            raise
github slub / efre-lod-elasticsearch-tools / helperscripts / fincsolr2marc.py View on Github external
def fixRecord(record="",record_id=0,validation=False,replaceMethod='decimal'):
        replaceMethods = {
            'decimal':(( '#29;', '#30;', '#31;' ), ( "\x1D", "\x1E", "\x1F" )),
            'unicode':(( '\u001d', '\u001e', '\u001f' ), ( "\x1D", "\x1E", "\x1F" )),
            'hex':(( '\x1D', '\x1E', '\x1F' ), ( "\x1D", "\x1E", "\x1F" ))
        }
        marcFullRecordFixed=record
        for i in range(0,3):
            marcFullRecordFixed=marcFullRecordFixed.replace(replaceMethods.get(replaceMethod)[0][i],replaceMethods.get(replaceMethod)[1][i])
        if validation:
            try:
                reader=pymarc.MARCReader(marcFullRecordFixed.encode('utf8'),utf8_handling='replace')
                marcrecord=next(reader)
            except (RecordLengthInvalid, RecordLeaderInvalid, BaseAddressNotFound, BaseAddressInvalid, RecordDirectoryInvalid, NoFieldsFound, UnicodeDecodeError) as e:
                eprint("record id {0}:".format(record_id)+str(e))
                with open('invalid_records.txt','a') as error:
                    #file_out.pluserror()
                    eprint(marcFullRecordFixed,file=error)
                    return None
        return marcFullRecordFixed
github thisismattmiller / catalog-network / marc2gexf / generate_gexf.py View on Github external
def processFile(self, file):

		print ""

		print "Total Subjects:", self.globalSubjectsCount
		print "Total Records:", self.globalTotalRecords

		print "Processing next file:"
		print file

		reader = MARCReader(open(file))

		count = 0

		

		for record in reader:

			count+=1
			self.globalTotalRecords+=1

			sys.stdout.write("\rRecord# %d" %count)


			#print record

			subjects = []
github jermnelson / aristotle-library-apps / aristotle / management / commands / load_authorities.py View on Github external
def __index_titles__(**kwargs):
    redis_ds = kwargs.get('redis_datastore',
                          REDIS_DATASTORE)
    filename = kwargs.get('filename', None)
    if filename is None:
        return
    title_authorities = pymarc.MARCReader(
        open(filename,
             'rb'),
        to_unicode=True)
    start_time = datetime.datetime.utcnow()
    print("Started title indexing at {0}".format(start_time.isoformat()))
    for i, rec in enumerate(title_authorities):
        index_marc(record=rec, redis_datastore=redis_ds)
        if not i%100:
            sys.stderr.write(".")
        if not i%1000:
            print(i)
    end_time = datetime.datetime.utcnow()
    print("End title indexing at {0}, total-time={1}".format(
        end_time.isoformat(),
        end_time-start_time))