How to use the tesserocr.PT function in tesserocr

To help you get started, we’ve selected a few tesserocr examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OCR-D / ocrd_tesserocr / ocrd_tesserocr / segment_table.py View on Github external
polygon = polygon_from_x0y0x1y1(bbox)
            polygon = coordinates_for_segment(polygon, region_image, region_coords)
            points = points_from_polygon(polygon)
            coords = CoordsType(points=points)
            # if xywh['w'] < 30 or xywh['h'] < 30:
            #     LOG.info('Ignoring too small region: %s', points)
            #     it.Next(RIL.BLOCK)
            #     continue
            #
            # add the region reference in the reading order element
            # (but ignore non-text regions entirely)
            ID = region.id + "_%04d" % index
            subregion = TextRegionType(id=ID, Coords=coords,
                                       type=TextTypeSimpleType.PARAGRAPH)
            block_type = it.BlockType()
            if block_type == PT.FLOWING_TEXT:
                pass
            elif block_type == PT.HEADING_TEXT:
                subregion.set_type(TextTypeSimpleType.HEADING)
            elif block_type == PT.PULLOUT_TEXT:
                subregion.set_type(TextTypeSimpleType.FLOATING)
            elif block_type == PT.CAPTION_TEXT:
                subregion.set_type(TextTypeSimpleType.CAPTION)
            elif block_type == PT.VERTICAL_TEXT:
                subregion.set_orientation(90.0)
            else:
                it.Next(RIL.BLOCK)
                continue
            LOG.info("Detected cell '%s': %s (%s)", ID, points, membername(PT, block_type))
            region.add_TextRegion(subregion)
            if rogroup:
                rogroup.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / segment_table.py View on Github external
type=TextTypeSimpleType.PARAGRAPH)
            block_type = it.BlockType()
            if block_type == PT.FLOWING_TEXT:
                pass
            elif block_type == PT.HEADING_TEXT:
                subregion.set_type(TextTypeSimpleType.HEADING)
            elif block_type == PT.PULLOUT_TEXT:
                subregion.set_type(TextTypeSimpleType.FLOATING)
            elif block_type == PT.CAPTION_TEXT:
                subregion.set_type(TextTypeSimpleType.CAPTION)
            elif block_type == PT.VERTICAL_TEXT:
                subregion.set_orientation(90.0)
            else:
                it.Next(RIL.BLOCK)
                continue
            LOG.info("Detected cell '%s': %s (%s)", ID, points, membername(PT, block_type))
            region.add_TextRegion(subregion)
            if rogroup:
                rogroup.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))
            #
            # iterator increment
            #
            index += 1
            it.Next(RIL.BLOCK)
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / segment_region.py View on Github external
#     continue
            # region_image_bin = it.GetBinaryImage(RIL.BLOCK)
            # if not region_image_bin.getbbox():
            #     LOG.info('Ignoring binary-empty region: %s', points)
            #     it.Next(RIL.BLOCK)
            #     continue
            #
            # add the region reference in the reading order element
            # (will be removed again if Separator/Noise region below)
            ID = "region%04d" % index
            og.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))
            #
            # region type switch
            #
            block_type = it.BlockType()
            if block_type in [PT.FLOWING_TEXT,
                              PT.HEADING_TEXT,
                              PT.PULLOUT_TEXT,
                              PT.CAPTION_TEXT,
                              # TABLE is contained in PTIsTextType, but
                              # it is a bad idea to create a TextRegion
                              # for it (better set `find_tables` False):
                              # PT.TABLE,
                              # will also get a 90° @orientation
                              # (but that can be overridden by deskew/OSD):
                              PT.VERTICAL_TEXT]:
                region = TextRegionType(id=ID, Coords=coords,
                                        type=TextTypeSimpleType.PARAGRAPH)
                if block_type == PT.VERTICAL_TEXT:
                    region.set_orientation(90.0)
                elif block_type == PT.HEADING_TEXT:
                    region.set_type(TextTypeSimpleType.HEADING)
github OCR-D / ocrd_tesserocr / ocrd_tesserocr / segment_region.py View on Github external
elif block_type in [PT.FLOWING_IMAGE,
                                PT.HEADING_IMAGE,
                                PT.PULLOUT_IMAGE]:
                region = ImageRegionType(id=ID, Coords=coords)
                page.add_ImageRegion(region)
            elif block_type in [PT.HORZ_LINE,
                                PT.VERT_LINE]:
                region = SeparatorRegionType(id=ID, Coords=coords)
                page.add_SeparatorRegion(region)
                # undo appending in ReadingOrder
                og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
            elif block_type in [PT.INLINE_EQUATION,
                                PT.EQUATION]:
                region = MathsRegionType(id=ID, Coords=coords)
                page.add_MathsRegion(region)
            elif block_type == PT.TABLE:
                # without API access to StructuredTable we cannot
                # do much for a TableRegionType (i.e. nrows, ncols,
                # coordinates of cells for recursive regions etc),
                # but this can be achieved afterwards by segment-table
                region = TableRegionType(id=ID, Coords=coords)
                page.add_TableRegion(region)
            else:
                region = NoiseRegionType(id=ID, Coords=coords)
                page.add_NoiseRegion()
                # undo appending in ReadingOrder
                og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
            LOG.info("Detected region '%s': %s (%s)", ID, points, membername(PT, block_type))
            #
            # iterator increment
            #
            index += 1