Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_page_seg_mode(self):
"""Test SetPageSegMode and GetPageSegMode."""
self._api.SetPageSegMode(tesserocr.PSM.SINGLE_WORD)
self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.SINGLE_WORD)
self._api.SetPageSegMode(tesserocr.PSM.AUTO)
self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.AUTO)
def test_page_seg_mode(self):
"""Test SetPageSegMode and GetPageSegMode."""
self._api.SetPageSegMode(tesserocr.PSM.SINGLE_WORD)
self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.SINGLE_WORD)
self._api.SetPageSegMode(tesserocr.PSM.AUTO)
self.assertEqual(self._api.GetPageSegMode(), tesserocr.PSM.AUTO)
def _process_existing_words(self, tessapi, words, line_image, line_xywh):
for word in words:
word_image, word_xywh = self.workspace.image_from_segment(
word, line_image, line_xywh)
if self.parameter['padding']:
bg = tuple(ImageStat.Stat(word_image).median)
pad = self.parameter['padding']
padded = Image.new(word_image.mode,
(word_image.width + 2 * pad,
word_image.height + 2 * pad), bg)
padded.paste(word_image, (pad, pad))
tessapi.SetImage(padded)
else:
tessapi.SetImage(word_image)
tessapi.SetPageSegMode(PSM.SINGLE_WORD)
if self.parameter['textequiv_level'] == 'word':
LOG.debug("Recognizing text in word '%s'", word.id)
word_text = tessapi.GetUTF8Text().rstrip("\n\f")
word_conf = tessapi.AllWordConfidences()
word_conf = word_conf[0]/100.0 if word_conf else 0.0
if word.get_TextEquiv():
LOG.warning("Word '%s' already contained text results", word.id)
word.set_TextEquiv([])
# todo: consider WordFontAttributes (TextStyle) etc (if not word.get_TextStyle())
word.add_TextEquiv(TextEquivType(Unicode=word_text, conf=word_conf))
continue # next word (to avoid indentation below)
## glyph level:
glyphs = word.get_Glyph()
if glyphs:
## external glyph layout:
LOG.warning("Word '%s' contains glyphs already, recognition might be suboptimal", word.id)