Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def sandwich(resources):
# Has XMP, docinfo, , shorthand attribute XMP
return Pdf.open(resources / 'sandwich.pdf')
def first_image_in(filename):
pdf = Pdf.open(filename)
pdfimagexobj = next(iter(pdf.pages[0].images.values()))
return pdfimagexobj, pdf
def congress(resources):
pdf = Pdf.open(resources / 'congress.pdf')
pdfimage = pdf.pages[0].Resources.XObject['/Im0']
return pdfimage, pdf
def test_pdfa_modify(resources, outdir):
sandwich = resources / 'sandwich.pdf'
assert verapdf_validate(sandwich)
pdf = Pdf.open(sandwich)
with pdf.open_metadata(update_docinfo=False, set_pikepdf_as_editor=False) as meta:
pass
pdf.save(outdir / '1.pdf')
assert verapdf_validate(outdir / '1.pdf')
pdf = Pdf.open(sandwich)
with pdf.open_metadata(update_docinfo=False, set_pikepdf_as_editor=True) as meta:
pass
pdf.save(outdir / '2.pdf')
assert verapdf_validate(outdir / '2.pdf')
pdf = Pdf.open(sandwich)
with pdf.open_metadata(update_docinfo=True, set_pikepdf_as_editor=True) as meta:
meta['dc:source'] = 'Test'
meta['dc:title'] = 'Title Test'
pdf.save(outdir / '3.pdf')
assert verapdf_validate(outdir / '3.pdf')
def test_pdfa_sanity(resources, outdir):
filename = resources / 'veraPDF test suite 6-2-10-t02-pass-a.pdf'
assert verapdf_validate(filename)
pdf = Pdf.open(filename)
pdf.save(outdir / 'pdfa.pdf')
assert verapdf_validate(outdir / 'pdfa.pdf')
assert pdf.open_metadata().pdfa_status == '1B'
def inline(resources):
pdf = Pdf.open(resources / 'image-mono-inline.pdf')
for operands, _command in parse_content_stream(pdf.pages[0]):
if operands and isinstance(operands[0], PdfInlineImage):
return operands[0], pdf
def enron1(resources):
# Has nuls in docinfo, old PDF
return Pdf.open(resources / 'enron1_gs.pdf')
def optimize(input_file, output_file, context, save_settings):
log = context.log
options = context.options
if options.optimize == 0:
safe_symlink(input_file, output_file)
return
if options.jpeg_quality == 0:
options.jpeg_quality = DEFAULT_JPEG_QUALITY if options.optimize < 3 else 40
if options.png_quality == 0:
options.png_quality = DEFAULT_PNG_QUALITY if options.optimize < 3 else 30
if options.jbig2_page_group_size == 0:
options.jbig2_page_group_size = 10 if options.jbig2_lossy else 1
with pikepdf.Pdf.open(input_file) as pike:
root = Path(output_file).parent / 'images'
root.mkdir(exist_ok=True)
jpegs, pngs = extract_images_generic(pike, root, log, options)
transcode_jpegs(pike, jpegs, root, log, options)
# if options.optimize >= 2:
# Try pngifying the jpegs
# transcode_pngs(pike, jpegs, jpg_name, root, log, options)
transcode_pngs(pike, pngs, png_name, root, log, options)
jbig2_groups = extract_images_jbig2(pike, root, log, options)
convert_to_jbig2(pike, jbig2_groups, root, log, options)
target_file = Path(output_file).with_suffix('.opt.pdf')
pike.remove_unreferenced_resources()
pike.save(target_file, **save_settings)