Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_no_glyphless_graft(resources, outdir):
pdf = pikepdf.open(resources / 'francais.pdf')
pdf_aspect = pikepdf.open(resources / 'aspect.pdf')
pdf_cmyk = pikepdf.open(resources / 'cmyk.pdf')
pdf.pages.extend(pdf_aspect.pages)
pdf.pages.extend(pdf_cmyk.pages)
pdf.save(outdir / 'test.pdf')
with patch('ocrmypdf._graft.MAX_REPLACE_PAGES', 2):
ocrmypdf.ocr(
outdir / 'test.pdf', outdir / 'out.pdf', deskew=True, tesseract_timeout=0
)
def test_encrypt_info(trivial, outpdf):
trivial.save(outpdf, encryption=dict(R=4, owner='foo', user='bar'))
pdf = pikepdf.open(outpdf, password='foo')
assert pdf.encryption.user_password == b'bar'
assert pdf.encryption.bits == 128
def linn(resources):
path = resources / 'linn.pdf'
return path, pikepdf.open(path)
def test_preserve_metadata(spoof_tesseract_noop, output_type, resources, outpdf):
pdf_before = pikepdf.open(resources / 'graph.pdf')
output = check_ocrmypdf(
resources / 'graph.pdf',
outpdf,
'--output-type',
output_type,
env=spoof_tesseract_noop,
)
pdf_after = pikepdf.open(output)
for key in ('/Title', '/Author'):
assert pdf_before.docinfo[key] == pdf_after.docinfo[key]
pdfa_info = file_claims_pdfa(str(output))
assert pdfa_info['output'] == output_type
def test_with_block_abuse(resources):
with pikepdf.open(resources / 'pal-1bit-trivial.pdf') as pdf:
im0 = pdf.pages[0].Resources.XObject['/Im0']
with pytest.raises(PdfError):
im0.read_bytes()
def test_no_glyphless_graft(resources, outdir):
pdf = pikepdf.open(resources / 'francais.pdf')
pdf_aspect = pikepdf.open(resources / 'aspect.pdf')
pdf_cmyk = pikepdf.open(resources / 'cmyk.pdf')
pdf.pages.extend(pdf_aspect.pages)
pdf.pages.extend(pdf_cmyk.pages)
pdf.save(outdir / 'test.pdf')
with patch('ocrmypdf._graft.MAX_REPLACE_PAGES', 2):
ocrmypdf.ocr(
outdir / 'test.pdf', outdir / 'out.pdf', deskew=True, tesseract_timeout=0
)
def test_encrypt_basic(trivial, outpdf, R, owner, user):
trivial.save(outpdf, encryption=dict(R=R, owner=owner, user=user))
pdf_owner = pikepdf.open(outpdf, password=owner)
assert pdf_owner.is_encrypted
pdf_user = pikepdf.open(outpdf, password=user)
assert pdf_user.is_encrypted
def metadata_fixup(working_file, context):
output_file = context.get_path('metafix.pdf')
options = context.options
original = pikepdf.open(context.origin)
docinfo = get_docinfo(original, options)
pdf = pikepdf.open(working_file)
with pdf.open_metadata() as meta:
meta.load_from_docinfo(docinfo, delete_missing=False)
# If xmp:CreateDate is missing, set it to the modify date to
# match Ghostscript, for consistency
if 'xmp:CreateDate' not in meta:
meta['xmp:CreateDate'] = meta.get('xmp:ModifyDate', '')
meta_original = original.open_metadata()
not_copied = set(meta_original.keys()) - set(meta.keys())
if not_copied:
if options.output_type.startswith('pdfa'):
context.log.warning(
"Some input metadata could not be copied because it is not "
"permitted in PDF/A. You may wish to examine the output "
"PDF's XMP metadata."
)
input_size = Path(input_file).stat().st_size
output_size = Path(target_file).stat().st_size
if output_size == 0:
raise OutputFileAccessError(
f"Output file not created after optimizing. We probably ran "
f"out of disk space in the temporary folder: {tempfile.gettempdir()}."
)
ratio = input_size / output_size
savings = 1 - output_size / input_size
log.info(f"Optimize ratio: {ratio:.2f} savings: {(100 * savings):.1f}%")
if savings < 0:
log.info("Image optimization did not improve the file - discarded")
# We still need to save the file
with pikepdf.open(input_file) as pike:
pike.remove_unreferenced_resources()
pike.save(output_file, **save_settings)
else:
safe_symlink(target_file, output_file)