Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
polygon = polygon_from_x0y0x1y1(bbox)
polygon = coordinates_for_segment(polygon, region_image, region_coords)
points = points_from_polygon(polygon)
coords = CoordsType(points=points)
# if xywh['w'] < 30 or xywh['h'] < 30:
# LOG.info('Ignoring too small region: %s', points)
# it.Next(RIL.BLOCK)
# continue
#
# add the region reference in the reading order element
# (but ignore non-text regions entirely)
ID = region.id + "_%04d" % index
subregion = TextRegionType(id=ID, Coords=coords,
type=TextTypeSimpleType.PARAGRAPH)
block_type = it.BlockType()
if block_type == PT.FLOWING_TEXT:
pass
elif block_type == PT.HEADING_TEXT:
subregion.set_type(TextTypeSimpleType.HEADING)
elif block_type == PT.PULLOUT_TEXT:
subregion.set_type(TextTypeSimpleType.FLOATING)
elif block_type == PT.CAPTION_TEXT:
subregion.set_type(TextTypeSimpleType.CAPTION)
elif block_type == PT.VERTICAL_TEXT:
subregion.set_orientation(90.0)
else:
it.Next(RIL.BLOCK)
continue
LOG.info("Detected cell '%s': %s (%s)", ID, points, membername(PT, block_type))
region.add_TextRegion(subregion)
if rogroup:
rogroup.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))
type=TextTypeSimpleType.PARAGRAPH)
block_type = it.BlockType()
if block_type == PT.FLOWING_TEXT:
pass
elif block_type == PT.HEADING_TEXT:
subregion.set_type(TextTypeSimpleType.HEADING)
elif block_type == PT.PULLOUT_TEXT:
subregion.set_type(TextTypeSimpleType.FLOATING)
elif block_type == PT.CAPTION_TEXT:
subregion.set_type(TextTypeSimpleType.CAPTION)
elif block_type == PT.VERTICAL_TEXT:
subregion.set_orientation(90.0)
else:
it.Next(RIL.BLOCK)
continue
LOG.info("Detected cell '%s': %s (%s)", ID, points, membername(PT, block_type))
region.add_TextRegion(subregion)
if rogroup:
rogroup.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))
#
# iterator increment
#
index += 1
it.Next(RIL.BLOCK)
# continue
# region_image_bin = it.GetBinaryImage(RIL.BLOCK)
# if not region_image_bin.getbbox():
# LOG.info('Ignoring binary-empty region: %s', points)
# it.Next(RIL.BLOCK)
# continue
#
# add the region reference in the reading order element
# (will be removed again if Separator/Noise region below)
ID = "region%04d" % index
og.add_RegionRefIndexed(RegionRefIndexedType(regionRef=ID, index=index))
#
# region type switch
#
block_type = it.BlockType()
if block_type in [PT.FLOWING_TEXT,
PT.HEADING_TEXT,
PT.PULLOUT_TEXT,
PT.CAPTION_TEXT,
# TABLE is contained in PTIsTextType, but
# it is a bad idea to create a TextRegion
# for it (better set `find_tables` False):
# PT.TABLE,
# will also get a 90° @orientation
# (but that can be overridden by deskew/OSD):
PT.VERTICAL_TEXT]:
region = TextRegionType(id=ID, Coords=coords,
type=TextTypeSimpleType.PARAGRAPH)
if block_type == PT.VERTICAL_TEXT:
region.set_orientation(90.0)
elif block_type == PT.HEADING_TEXT:
region.set_type(TextTypeSimpleType.HEADING)
elif block_type in [PT.FLOWING_IMAGE,
PT.HEADING_IMAGE,
PT.PULLOUT_IMAGE]:
region = ImageRegionType(id=ID, Coords=coords)
page.add_ImageRegion(region)
elif block_type in [PT.HORZ_LINE,
PT.VERT_LINE]:
region = SeparatorRegionType(id=ID, Coords=coords)
page.add_SeparatorRegion(region)
# undo appending in ReadingOrder
og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
elif block_type in [PT.INLINE_EQUATION,
PT.EQUATION]:
region = MathsRegionType(id=ID, Coords=coords)
page.add_MathsRegion(region)
elif block_type == PT.TABLE:
# without API access to StructuredTable we cannot
# do much for a TableRegionType (i.e. nrows, ncols,
# coordinates of cells for recursive regions etc),
# but this can be achieved afterwards by segment-table
region = TableRegionType(id=ID, Coords=coords)
page.add_TableRegion(region)
else:
region = NoiseRegionType(id=ID, Coords=coords)
page.add_NoiseRegion()
# undo appending in ReadingOrder
og.set_RegionRefIndexed(og.get_RegionRefIndexed()[:-1])
LOG.info("Detected region '%s': %s (%s)", ID, points, membername(PT, block_type))
#
# iterator increment
#
index += 1