Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_unpaper_args_invalid_filename(spoof_tesseract_noop, resources, outpdf):
p, out, err = run_ocrmypdf(
resources / "skew.pdf",
outpdf,
"-c",
"--unpaper-args",
"/etc/passwd",
env=spoof_tesseract_noop,
)
assert "No filenames allowed" in err
assert p.returncode == ExitCode.bad_args
def test_textonly_pdf_on_newer_tess3(resources, no_outpdf):
p, _, _ = pytest.helpers.run_ocrmypdf(
resources / 'linn.pdf',
no_outpdf, '--pdf-renderer', 'sandwich')
assert p.returncode == ExitCode.ok
def test_textonly_pdf_on_older_tess3(resources, no_outpdf):
p, _, _ = pytest.helpers.run_ocrmypdf(
resources / 'linn.pdf',
no_outpdf, '--pdf-renderer', 'sandwich')
assert p.returncode == ExitCode.missing_dependency
def test_gs_raster_failure(spoof_no_tess_gs_raster_fail, resources, outpdf):
p, out, err = run_ocrmypdf(
resources / 'ccitt.pdf', outpdf, env=spoof_no_tess_gs_raster_fail
)
print(err)
assert p.returncode == ExitCode.child_process_error
def test_gs_render_failure(spoof_no_tess_gs_render_fail, resources, outpdf):
p, out, err = run_ocrmypdf(
resources / 'blank.pdf', outpdf, env=spoof_no_tess_gs_render_fail
)
print(err)
assert p.returncode == ExitCode.child_process_error
def test_userunit_ghostscript_fails(poster, no_outpdf, caplog):
result = run_ocrmypdf_api(poster, no_outpdf, '--output-type=pdfa')
assert result == ExitCode.input_file
assert 'not supported by Ghostscript' in caplog.text
def test_stdin(spoof_tesseract_noop, ocrmypdf_exec, resources, outpdf):
input_file = str(resources / 'francais.pdf')
output_file = str(outpdf)
# Runs: ocrmypdf - output.pdf < testfile.pdf
with open(input_file, 'rb') as input_stream:
p_args = ocrmypdf_exec + ['-', output_file]
p = run(
p_args,
stdout=PIPE,
stderr=PIPE,
stdin=input_stream,
env=spoof_tesseract_noop,
)
assert p.returncode == ExitCode.ok
# Ghostscript doesn't support high Unicode, so neither do we, to be
# safe
input_file = resources / 'c02-22.pdf'
high_unicode = 'U+1030C is: 𐌌'
p, out, err = run_ocrmypdf(
input_file,
no_outpdf,
'--subject',
high_unicode,
'--output-type',
'pdfa',
env=spoof_tesseract_noop,
)
assert p.returncode == ExitCode.bad_args, err