Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_old_ghostscript(caplog):
with patch('ocrmypdf.exec.ghostscript.version', return_value='9.19'), patch(
'ocrmypdf.exec.tesseract.has_textonly_pdf', return_value=True
):
vd.check_options_output(make_opts(language='chi_sim', output_type='pdfa'))
assert 'Ghostscript does not work correctly' in caplog.text
with patch('ocrmypdf.exec.ghostscript.version', return_value='9.18'), patch(
'ocrmypdf.exec.tesseract.has_textonly_pdf', return_value=True
):
with pytest.raises(MissingDependencyError):
vd.check_options_output(make_opts(output_type='pdfa-3'))
with patch('ocrmypdf.exec.ghostscript.version', return_value='9.24'), patch(
'ocrmypdf.exec.tesseract.has_textonly_pdf', return_value=True
):
with pytest.raises(MissingDependencyError):
vd.check_dependency_versions(make_opts())
def test_no_unpaper(resources, no_outpdf):
input_ = fspath(resources / "c02-22.pdf")
output = fspath(no_outpdf)
options = parser.parse_args(args=["--clean", input_, output])
with patch("ocrmypdf.exec.unpaper.version") as mock_unpaper_version:
mock_unpaper_version.side_effect = FileNotFoundError("unpaper")
with pytest.raises(MissingDependencyError):
check_options(options)
with patch('ocrmypdf.exec.ghostscript.version', return_value='9.19'), patch(
'ocrmypdf.exec.tesseract.has_textonly_pdf', return_value=True
):
vd.check_options_output(make_opts(language='chi_sim', output_type='pdfa'))
assert 'Ghostscript does not work correctly' in caplog.text
with patch('ocrmypdf.exec.ghostscript.version', return_value='9.18'), patch(
'ocrmypdf.exec.tesseract.has_textonly_pdf', return_value=True
):
with pytest.raises(MissingDependencyError):
vd.check_options_output(make_opts(output_type='pdfa-3'))
with patch('ocrmypdf.exec.ghostscript.version', return_value='9.24'), patch(
'ocrmypdf.exec.tesseract.has_textonly_pdf', return_value=True
):
with pytest.raises(MissingDependencyError):
vd.check_dependency_versions(make_opts())
def test_version_check():
from ocrmypdf.exec import get_version
with pytest.raises(MissingDependencyError):
get_version('NOT_FOUND_UNLIKELY_ON_PATH')
with pytest.raises(MissingDependencyError):
get_version('sh', version_arg='-c')
with pytest.raises(MissingDependencyError):
get_version('echo')
def test_no_languages(tmp_path):
env = os.environ.copy()
(tmp_path / 'tessdata').mkdir()
env['TESSDATA_PREFIX'] = fspath(tmp_path)
with pytest.raises(MissingDependencyError):
tesseract.languages(tesseract_env=env)
def test_german(spoof_tesseract_cache, resources, outdir):
# Produce a sidecar too - implicit test that system locale is set up
# properly. It is fine that we are testing -l deu on a French file because
# we are exercising the functionality not going for accuracy.
sidecar = outdir / 'francais.txt'
try:
check_ocrmypdf(
resources / 'francais.pdf',
outdir / 'francais.pdf',
'-l',
'deu', # more commonly installed
'--sidecar',
sidecar,
env=spoof_tesseract_cache,
)
except MissingDependencyError:
if 'deu' not in tesseract.languages():
pytest.xfail(reason="tesseract-deu language pack not installed")
raise