Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
check_ocrmypdf(
input_file, outpdf, '--output-type', output_type, env=spoof_tesseract_noop
)
pdf_before = pikepdf.open(input_file)
pdf_after = pikepdf.open(outpdf)
before = pdf_before.trailer.get('/Info', {})
after = pdf_after.trailer.get('/Info', {})
if not before:
assert after.get('/CreationDate', '') != ''
else:
# We expect that the creation date stayed the same
date_before = decode_pdf_date(str(before['/CreationDate']))
date_after = decode_pdf_date(str(after['/CreationDate']))
assert seconds_between_dates(date_before, date_after) < 1000
# We expect that the modified date is quite recent
date_after = decode_pdf_date(str(after['/ModDate']))
assert seconds_between_dates(date_after, datetime.datetime.now(timezone.utc)) < 1000
def test_build_metadata(trivial, graph, outdir):
with trivial.open_metadata(set_pikepdf_as_editor=False) as xmp:
xmp.load_from_docinfo(graph.docinfo)
trivial.save(outdir / 'tmp.pdf')
pdf = pikepdf.open(outdir / 'tmp.pdf')
assert pdf.Root.Metadata.Type == Name.Metadata
assert pdf.Root.Metadata.Subtype == Name.XML
with pdf.open_metadata(set_pikepdf_as_editor=False) as xmp:
assert 'pdf:Producer' not in xmp
xmp_date = xmp['xmp:CreateDate']
docinfo_date = decode_pdf_date(trivial.docinfo[Name.CreationDate])
assert xmp_date == docinfo_date.isoformat()
pdf_before = pikepdf.open(input_file)
pdf_after = pikepdf.open(outpdf)
before = pdf_before.trailer.get('/Info', {})
after = pdf_after.trailer.get('/Info', {})
if not before:
assert after.get('/CreationDate', '') != ''
else:
# We expect that the creation date stayed the same
date_before = decode_pdf_date(str(before['/CreationDate']))
date_after = decode_pdf_date(str(after['/CreationDate']))
assert seconds_between_dates(date_before, date_after) < 1000
# We expect that the modified date is quite recent
date_after = decode_pdf_date(str(after['/ModDate']))
assert seconds_between_dates(date_after, datetime.datetime.now(timezone.utc)) < 1000
check_ocrmypdf(
input_file, outpdf, '--output-type', output_type, env=spoof_tesseract_noop
)
pdf_before = pikepdf.open(input_file)
pdf_after = pikepdf.open(outpdf)
before = pdf_before.trailer.get('/Info', {})
after = pdf_after.trailer.get('/Info', {})
if not before:
assert after.get('/CreationDate', '') != ''
else:
# We expect that the creation date stayed the same
date_before = decode_pdf_date(str(before['/CreationDate']))
date_after = decode_pdf_date(str(after['/CreationDate']))
assert seconds_between_dates(date_before, date_after) < 1000
# We expect that the modified date is quite recent
date_after = decode_pdf_date(str(after['/ModDate']))
assert seconds_between_dates(date_after, datetime.datetime.now(timezone.utc)) < 1000
def test_decode_pdf_date():
VALS = [
('20160220040559', datetime(2016, 2, 20, 4, 5, 59)),
("20180101010101Z00'00'", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)),
("20180101010101Z", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)),
("20180101010101+0000", datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone.utc)),
(
"20180101010101+0100",
datetime(2018, 1, 1, 1, 1, 1, tzinfo=timezone(timedelta(hours=1))),
),
]
for s, d in VALS:
assert decode_pdf_date(s) == d
'--output-type',
output_type,
env=spoof_tesseract_noop,
)
assert p.returncode == ExitCode.ok, err
before = pikepdf.open(input_file)
after = pikepdf.open(outpdf)
assert after.docinfo.Title == german, after.docinfo
assert after.docinfo.Author == chinese, after.docinfo
assert after.docinfo.get('/Keywords', '') == ''
before_date = decode_pdf_date(str(before.docinfo.CreationDate))
after_date = decode_pdf_date(str(after.docinfo.CreationDate))
assert before_date == after_date
pdfa_info = file_claims_pdfa(outpdf)
assert pdfa_info['output'] == output_type
chinese,
'--output-type',
output_type,
env=spoof_tesseract_noop,
)
assert p.returncode == ExitCode.ok, err
before = pikepdf.open(input_file)
after = pikepdf.open(outpdf)
assert after.docinfo.Title == german, after.docinfo
assert after.docinfo.Author == chinese, after.docinfo
assert after.docinfo.get('/Keywords', '') == ''
before_date = decode_pdf_date(str(before.docinfo.CreationDate))
after_date = decode_pdf_date(str(after.docinfo.CreationDate))
assert before_date == after_date
pdfa_info = file_claims_pdfa(outpdf)
assert pdfa_info['output'] == output_type