Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_image_to_data_common_output(test_file, output):
"""Test and compare the type of the result."""
result = image_to_data(test_file, output_type=output)
expected_keys = [
'level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num',
'left', 'top', 'width', 'height', 'conf', 'text'
]
if output is Output.BYTES:
assert isinstance(result, bytes)
elif output is Output.DICT:
assert isinstance(result, dict)
assert bool(set(result.keys()).intersection(expected_keys))
elif output is Output.STRING:
assert isinstance(result, unicode if IS_PYTHON_2 else str)
for key in expected_keys:
assert key in result
def test_wrong_tesseract_cmd(test_file, test_path):
"""Test wrong or missing tesseract command."""
import pytesseract
pytesseract.pytesseract.tesseract_cmd = test_path
with pytest.raises(TesseractNotFoundError):
pytesseract.pytesseract.image_to_string(test_file)
pytesseract.pytesseract.tesseract_cmd = 'tesseract' # restore the def value
def test_wrong_tesseract_cmd(test_file, test_path):
"""Test wrong or missing tesseract command."""
import pytesseract
pytesseract.pytesseract.tesseract_cmd = test_path
with pytest.raises(TesseractNotFoundError):
pytesseract.pytesseract.image_to_string(test_file)
pytesseract.pytesseract.tesseract_cmd = 'tesseract' # restore the def value
def test_wrong_tesseract_cmd(test_file, test_path):
"""Test wrong or missing tesseract command."""
import pytesseract
pytesseract.pytesseract.tesseract_cmd = test_path
with pytest.raises(TesseractNotFoundError):
pytesseract.pytesseract.image_to_string(test_file)
pytesseract.pytesseract.tesseract_cmd = 'tesseract' # restore the def value
def test_proper_oserror_exception_handling(test_file, test_path):
""""Test for bubbling up OSError exceptions."""
import pytesseract
pytesseract.pytesseract.tesseract_cmd = test_path
with pytest.raises(
TesseractNotFoundError if IS_PYTHON_2 and test_path else OSError
):
pytesseract.pytesseract.image_to_string(test_file)
pytesseract.pytesseract.tesseract_cmd = 'tesseract' # restore the def value
if numpy_installed:
import numpy as np
if pandas_installed:
import pandas
try:
from PIL import Image
except ImportError:
import Image
IS_PYTHON_2 = version_info[:1] < (3, )
IS_PYTHON_3 = not IS_PYTHON_2
TESSERACT_VERSION = tuple(get_tesseract_version().version) # to skip tests
DATA_DIR = path.join(path.dirname(path.abspath(__file__)), 'data')
TEST_JPEG = path.join(DATA_DIR, 'test.jpg')
pytestmark = pytest.mark.pytesseract # used marker for the module
@pytest.fixture(scope='session')
def test_file():
return TEST_JPEG
@pytest.fixture(scope='session')
def test_file_european():
return path.join(DATA_DIR, 'test-european.jpg')
def test_image_to_pdf_or_hocr(test_file, extension):
result = image_to_pdf_or_hocr(test_file, extension=extension)
if extension is 'pdf':
if IS_PYTHON_2:
assert isinstance(result, str)
result = str(result).strip()
assert result.startswith('%PDF')
assert result.endswith('EOF')
else:
assert isinstance(result, bytes)
if extension is 'hocr':
assert isinstance(result, bytes) # type
result = result.decode('utf-8') if IS_PYTHON_2 else str(result, 'utf-8')
result = str(result).strip()
assert result.startswith('')
Output.DICT,
Output.STRING,
],
ids=[
'bytes',
'dict',
'string',
]
)
def test_image_to_data_common_output(test_file, output):
"""Test and compare the type of the result."""
result = image_to_data(test_file, output_type=output)
expected_keys = [
'level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num',
'left', 'top', 'width', 'height', 'conf', 'text'
]
def test_image_to_data__pandas_support(test_file):
with pytest.raises(TSVNotSupported):
image_to_data(test_file, output_type=Output.DATAFRAME)
def test_image_to_data_common_output(test_file, output):
"""Test and compare the type of the result."""
result = image_to_data(test_file, output_type=output)
expected_keys = [
'level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num',
'left', 'top', 'width', 'height', 'conf', 'text'
]
if output is Output.BYTES:
assert isinstance(result, bytes)
elif output is Output.DICT:
assert isinstance(result, dict)
assert bool(set(result.keys()).intersection(expected_keys))
elif output is Output.STRING:
assert isinstance(result, unicode if IS_PYTHON_2 else str)
for key in expected_keys:
assert key in result