Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self.assertEqual(type(utitle), bytes)
self.assertEqual(utitle, b"De la solitude a\xcc\x80 la communaute\xcc\x81.")
with open("test/utf8_without_leader_flag.dat", "rb") as fh:
reader = MARCReader(fh, to_unicode=True, hide_utf8_warnings=True)
record = next(reader)
self.assertEqual(type(record), Record)
utitle = record["240"]["a"]
self.assertEqual(type(utitle), str)
# unless you force utf-8 characters will get lost and
# warnings will appear in the terminal
self.assertEqual(utitle, "De la solitude a la communaute .")
# force reading as utf-8
with open("test/utf8_without_leader_flag.dat", "rb") as fh:
reader = MARCReader(
fh, to_unicode=True, force_utf8=True, hide_utf8_warnings=True
)
record = next(reader)
self.assertEqual(type(record), Record)
utitle = record["240"]["a"]
self.assertEqual(type(utitle), str)
self.assertEqual(
utitle,
u"De la solitude a"
+ chr(0x0300)
+ " la communaute"
+ chr(0x0301)
+ ".",
)
parser.add_argument('-replaceMethod',type=str,default="decimal",help="Which kind of replace method to use. available: ")
parser.add_argument('-valid',action="store_true",help="validate MARC Records")
parser.add_argument('-toJson',action="store_true",default=False,help="Transpose to MarcXchange JSON on the fly")
args=parser.parse_args()
if args.help:
parser.print_help(sys.stderr)
exit()
for line in sys.stdin:
record=json.loads(line)
if record and record.get("recordtype") and args.format=="marc" and "marc" in record.get("recordtype") and not "xml" in record.get("recordtype"):
marcFullRecordFixed=fixRecord(record=record.get(args.frfield),record_id=record.get("record_id"),validation=args.valid,replaceMethod=args.replaceMethod)
if not args.toJson:
sys.stdout.write(marcFullRecordFixed)
else:
for record in MARCReader(marcFullRecordFixed.encode('utf-8'), to_unicode=True):
sys.stdout.write(json.dumps(transpose_to_ldj(record)))
elif record and record.get("recordtype") and "marcxml" in record.get("recordtype") and args.format=="marcxml":
pymarc.marcxml.parse_xml_to_array(StringIO(record.get(args.frfield))) #need wrapper in StringIO for read()-need in marcxml lib
import marcx
import pymarc
copytags = ("003", "005", "006", "007", "008", "020", "040", "043", "050", "082", "100", "245", "246", "250", "264", "300", "336", "337", "338", "490", "500",
"505", "520", "600", "610", "650", "651", "700", "710", "776", "830", "856")
inputfilename = "161_input.mrc"
outputfilename = "161_output.mrc"
if len(sys.argv) == 3:
inputfilename, outputfilename = sys.argv[1:]
inputfile = open(inputfilename, "rb")
outputfile = open(outputfilename, "wb")
reader = pymarc.MARCReader(inputfile)
for oldrecord in reader:
try:
f245a = oldrecord["245"]["a"]
except:
continue
newrecord = marcx.Record(force_utf8=True)
# Leader
leader = " " + oldrecord.leader[5:]
newrecord.leader = leader
# Identifikator
f001 = oldrecord["001"].data
def get_contributon(record, prop):
fullrecord_fixed = fixRecord(record=getProperty(record, prop), record_id=record.get(
"record_id"), validation=False, replaceMethod='decimal')
reader = pymarc.MARCReader(fullrecord_fixed.encode('utf-8'))
data = []
fields = ["100", "110", "111", "700", "710", "711"]
for record in reader:
for field in fields:
for f in record.get_fields(field):
contributor = {
"@type": ["bf:Contribution"],
"bf:agent": {
"@id": "http://d-nb.info/gnd/"
},
"bf:role": {
"@id": "http://id.loc.gov/vocabulary/relators/",
}
}
if f['a']:
contributor["bf:agent"]["rdfs:ch_label"] = f['a']
filename = filename or '{}.mrc'.format(timestamp())
filepath = filepath or '{}'.format(settings.MEDIA_ROOT)
self.success_count = 0
# If the file exists and append is True, we want to open the
# file up, read in the MARC records, then append our
# marc_records to that.
existing_records = []
if filepath[-1] != '/':
filepath = '{}/'.format(filepath)
try:
marcfile = file('{}{}'.format(filepath, filename), 'r')
except IOError:
pass
else:
if append:
reader = pymarc.MARCReader(marcfile)
existing_records.extend(reader)
else:
# If we're not appending but we found an existing file,
# let's find a new filename that doesn't exist.
file_exists = True
while file_exists:
filename = '{}.mrc'.format(timestamp())
try:
file('{}{}'.format(filepath, filename), 'r')
except IOError:
file_exists = False
try:
marcfile = file('{}{}'.format(filepath, filename), 'w')
except IOError:
raise
def fixRecord(record="",record_id=0,validation=False,replaceMethod='decimal'):
replaceMethods = {
'decimal':(( '#29;', '#30;', '#31;' ), ( "\x1D", "\x1E", "\x1F" )),
'unicode':(( '\u001d', '\u001e', '\u001f' ), ( "\x1D", "\x1E", "\x1F" )),
'hex':(( '\x1D', '\x1E', '\x1F' ), ( "\x1D", "\x1E", "\x1F" ))
}
marcFullRecordFixed=record
for i in range(0,3):
marcFullRecordFixed=marcFullRecordFixed.replace(replaceMethods.get(replaceMethod)[0][i],replaceMethods.get(replaceMethod)[1][i])
if validation:
try:
reader=pymarc.MARCReader(marcFullRecordFixed.encode('utf8'),utf8_handling='replace')
marcrecord=next(reader)
except (RecordLengthInvalid, RecordLeaderInvalid, BaseAddressNotFound, BaseAddressInvalid, RecordDirectoryInvalid, NoFieldsFound, UnicodeDecodeError) as e:
eprint("record id {0}:".format(record_id)+str(e))
with open('invalid_records.txt','a') as error:
#file_out.pluserror()
eprint(marcFullRecordFixed,file=error)
return None
return marcFullRecordFixed
def processFile(self, file):
print ""
print "Total Subjects:", self.globalSubjectsCount
print "Total Records:", self.globalTotalRecords
print "Processing next file:"
print file
reader = MARCReader(open(file))
count = 0
for record in reader:
count+=1
self.globalTotalRecords+=1
sys.stdout.write("\rRecord# %d" %count)
#print record
subjects = []
def __index_titles__(**kwargs):
redis_ds = kwargs.get('redis_datastore',
REDIS_DATASTORE)
filename = kwargs.get('filename', None)
if filename is None:
return
title_authorities = pymarc.MARCReader(
open(filename,
'rb'),
to_unicode=True)
start_time = datetime.datetime.utcnow()
print("Started title indexing at {0}".format(start_time.isoformat()))
for i, rec in enumerate(title_authorities):
index_marc(record=rec, redis_datastore=redis_ds)
if not i%100:
sys.stderr.write(".")
if not i%1000:
print(i)
end_time = datetime.datetime.utcnow()
print("End title indexing at {0}, total-time={1}".format(
end_time.isoformat(),
end_time-start_time))