Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
german = 'Du siehst den Wald vor lauter Bäumen nicht.'
chinese = '孔子'
p, out, err = run_ocrmypdf(
input_file,
outpdf,
'--title',
german,
'--author',
chinese,
'--output-type',
output_type,
env=spoof_tesseract_noop,
)
assert p.returncode == ExitCode.ok, err
before = pikepdf.open(input_file)
after = pikepdf.open(outpdf)
assert after.docinfo.Title == german, after.docinfo
assert after.docinfo.Author == chinese, after.docinfo
assert after.docinfo.get('/Keywords', '') == ''
before_date = decode_pdf_date(str(before.docinfo.CreationDate))
after_date = decode_pdf_date(str(after.docinfo.CreationDate))
assert before_date == after_date
pdfa_info = file_claims_pdfa(outpdf)
assert pdfa_info['output'] == output_type
os.close(0)
os.close(1)
p_args = ocrmypdf_exec + [input_file, output_file]
p = Popen( # pylint: disable=subprocess-popen-preexec-fn
p_args,
close_fds=True,
stdout=None,
stderr=PIPE,
stdin=None,
env=spoof_tesseract_noop,
preexec_fn=evil_closer,
)
out, err = p.communicate()
print(err.decode())
assert p.returncode == ExitCode.ok
def test_oem_on_tess3(resources, no_outpdf):
p, _, err = pytest.helpers.run_ocrmypdf(
resources / 'aspect.pdf',
no_outpdf, '--tesseract-oem', '1')
assert p.returncode == ExitCode.ok
assert 'argument ignored' in err
'pdf',
'-',
output_file,
]
p = run(
p_args,
stdout=PIPE,
stderr=PIPE,
stdin=input_stream,
universal_newlines=True,
env=spoof_tesseract_noop,
)
if im.mode in ('RGBA', 'LA'):
# If alpha image is input, expect an error
assert p.returncode != ExitCode.ok and 'alpha' in p.stderr
return
assert p.returncode == ExitCode.ok, p.stderr
pdfinfo = PdfInfo(output_file)
pdfimage = pdfinfo[0].images[0]
if input_file.endswith('.png'):
assert pdfimage.enc != Encoding.jpeg, "Lossless compression changed to lossy!"
elif input_file.endswith('.jpg'):
assert pdfimage.enc == Encoding.jpeg, "Lossy compression changed to lossless!"
if im.mode.startswith('RGB') or im.mode.startswith('BGR'):
assert pdfimage.color == Colorspace.rgb, "Colorspace changed"
elif im.mode.startswith('L'):
assert pdfimage.color == Colorspace.gray, "Colorspace changed"