Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
im_str = get_im_str(im)
logger.info('Segmenting {}'.format(im_str))
model = vgsl.TorchVGSLModel.load_model(model)
model.eval()
if mask:
if mask.mode != '1' and not is_bitonal(mask):
logger.error('Mask is not bitonal')
raise KrakenInputException('Mask is not bitonal')
mask = mask.convert('1')
if mask.size != im.size:
logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
logger.info('Masking enabled in segmenter.')
mask = pil2array(mask)
batch, channels, height, width = model.input
transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
res_tf = tf.Compose(transforms.transforms[:2])
scal_im = res_tf(im).convert('L')
with torch.no_grad():
logger.debug('Running network forward pass')
o = model.nn(transforms(im).unsqueeze(0))
logger.debug('Upsampling network output')
o = F.interpolate(o, size=scal_im.size[::-1])
o = o.squeeze().numpy()
logger.debug('Vectorizing network output')
baselines = vectorize_lines(o)
logger.debug('Polygonizing lines')
lines = list(zip(baselines, calculate_polygonal_environment(scal_im, baselines)))
im (PIL.Image): Input image
distort (float):
sigma (float):
eps (float):
delta (float):
Returns:
PIL.Image in mode 'L'
"""
w, h = im.size
# XXX: determine correct output shape from transformation matrices instead
# of guesstimating.
logger.debug(u'Pasting source image into canvas')
image = Image.new('L', (int(1.5*w), 4*h), 255)
image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2)))
line = pil2array(image.convert('L'))
# shear in y direction with factor eps * randn(), scaling with 1 + eps *
# randn() in x/y axis (all offset at d)
logger.debug(u'Performing affine transformation')
m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
c = np.array([w/2.0, h/2])
d = c - np.dot(m, c) + np.array([np.random.randn() * delta, np.random.randn() * delta])
line = affine_transform(line, m, offset=d, order=1, mode='constant', cval=255)
hs = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma)
ws = gaussian_filter(np.random.randn(4*h, int(1.5*w)), sigma)
hs *= distort/np.amax(hs)
ws *= distort/np.amax(ws)
def _f(p):
return (p[0] + hs[p[0], p[1]], p[1] + ws[p[0], p[1]])
For parameter meanings consult [1].
Args:
im (PIL.Image): Input image
eta (float):
alpha (float):
beta (float):
alpha_0 (float):
beta_0 (float):
Returns:
PIL.Image in mode '1'
"""
logger.debug(u'Inverting and normalizing input image')
im = pil2array(im)
im = np.amax(im)-im
im = im*1.0/np.amax(im)
logger.debug(u'Calculating foreground distance transform')
fg_dist = distance_transform_cdt(1-im, metric='taxicab')
logger.debug(u'Calculating flip to white probability')
fg_prob = alpha_0 * np.exp(-alpha * (fg_dist**2)) + eta
fg_prob[im == 1] = 0
fg_flip = np.random.binomial(1, fg_prob)
logger.debug(u'Calculating background distance transform')
bg_dist = distance_transform_cdt(im, metric='taxicab')
logger.debug(u'Calculating flip to black probability')
bg_prob = beta_0 * np.exp(-beta * (bg_dist**2)) + eta
bg_prob[im == 0] = 0
bg_flip = np.random.binomial(1, bg_prob)
Returns:
PIL.Image containing the binarized image
Raises:
KrakenInputException when trying to binarize an empty image.
"""
im_str = get_im_str(im)
logger.info('Binarizing {}'.format(im_str))
if is_bitonal(im):
logger.info('Skipping binarization because {} is bitonal.'.format(im_str))
return im
# convert to grayscale first
logger.debug('Converting {} to grayscale'.format(im_str))
im = im.convert('L')
raw = pil2array(im)
logger.debug('Scaling and normalizing')
# rescale image to between -1 or 0 and 1
raw = raw/np.float(np.iinfo(raw.dtype).max)
# perform image normalization
if np.amax(raw) == np.amin(raw):
logger.warning('Trying to binarize empty image {}'.format(im_str))
raise KrakenInputException('Image is empty')
image = raw-np.amin(raw)
image /= np.amax(image)
logger.debug('Interpolation and percentile filtering')
with warnings.catch_warnings():
warnings.simplefilter('ignore', UserWarning)
m = interpolation.zoom(image, zoom)
m = filters.percentile_filter(m, perc, size=(range, 2))
m = filters.percentile_filter(m, perc, size=(2, range))
def dewarp(normalizer: CenterNormalizer, im: Image.Image) -> Image.Image:
"""
Dewarps an image of a line using a kraken.lib.lineest.CenterNormalizer
instance.
Args:
normalizer (kraken.lib.lineest.CenterNormalizer): A line normalizer
instance
im (PIL.Image.Image): Image to dewarp
Returns:
PIL.Image containing the dewarped image.
"""
line = pil2array(im)
temp = np.amax(line)-line
temp = temp*1.0/np.amax(temp)
normalizer.measure(temp)
line = normalizer.normalize(line, cval=np.amax(line))
return array2pil(line)
angle = 0
offset = (0, 0)
elif text_direction == 'vertical-lr':
angle = 270
offset = (0, im.size[1])
elif text_direction == 'vertical-rl':
angle = 90
offset = (im.size[0], 0)
else:
logger.error('Invalid text direction \'{}\''.format(text_direction))
raise KrakenInputException('Invalid text direction {}'.format(text_direction))
logger.debug('Rotating input image by {} degrees'.format(angle))
im = im.rotate(angle, expand=True)
a = pil2array(im)
binary = np.array(a > 0.5*(np.amin(a) + np.amax(a)), 'i')
binary = 1 - binary
if not scale:
scale = estimate_scale(binary)
if no_hlines:
binary = remove_hlines(binary, scale)
# emptyish images wll cause exceptions here.
try:
if mask:
if mask.mode != '1' and not is_bitonal(mask):
logger.error('Mask is not bitonal')
raise KrakenInputException('Mask is not bitonal')
mask = mask.convert('1')
dsigma (float):
eps (float):
delta (float):
degradations (list): list returning 4-tuples corresponding to
the degradations argument of ocropus-linegen.
Returns:
PIL.Image in mode 'L'
"""
w, h = im.size
# XXX: determine correct output shape from transformation matrices instead
# of guesstimating.
logger.debug(u'Pasting source image into canvas')
image = Image.new('L', (int(1.5*w), 4*h), 255)
image.paste(im, (int((image.size[0] - w) / 2), int((image.size[1] - h) / 2)))
a = pil2array(image.convert('L'))
logger.debug(u'Selecting degradations')
(sigma, ssigma, threshold, sthreshold) = degradations[np.random.choice(len(degradations))]
sigma += (2 * np.random.rand() - 1) * ssigma
threshold += (2 * np.random.rand() - 1) * sthreshold
a = a * 1.0 / np.amax(a)
if sigma > 0.0:
logger.debug(u'Apply Gaussian filter')
a = gaussian_filter(a, sigma)
logger.debug(u'Adding noise')
a += np.clip(np.random.randn(*a.shape) * 0.2, -0.25, 0.25)
logger.debug(u'Perform affine transformation and resize')
m = np.array([[1 + eps * np.random.randn(), 0.0], [eps * np.random.randn(), 1.0 + eps * np.random.randn()]])
w, h = a.shape
c = np.array([w / 2.0, h / 2])
d = c - np.dot(m, c) + np.array([np.random.randn() * delta, np.random.randn() * delta])
a = affine_transform(a, m, offset=d, order=1, mode='constant', cval=a[0, 0])
if no_hlines:
binary = remove_hlines(binary, scale)
# emptyish images wll cause exceptions here.
try:
if mask:
if mask.mode != '1' and not is_bitonal(mask):
logger.error('Mask is not bitonal')
raise KrakenInputException('Mask is not bitonal')
mask = mask.convert('1')
if mask.size != im.size:
logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
logger.info('Masking enabled in segmenter. Disabling column detection.')
mask = mask.rotate(angle, expand=True)
colseps = pil2array(mask)
elif black_colseps:
colseps, binary = compute_black_colseps(binary, scale, maxcolseps)
else:
colseps = compute_white_colseps(binary, scale, maxcolseps)
except ValueError:
logger.warning('Exception in column finder (probably empty image) for {}.'.format(im_str))
return {'text_direction': text_direction, 'boxes': []}
bottom, top, boxmap = compute_gradmaps(binary, scale)
seeds = compute_line_seeds(binary, bottom, top, colseps, scale)
llabels = morph.propagate_labels(boxmap, seeds, conflict=0)
spread = morph.spread_labels(seeds, maxdist=scale)
llabels = np.where(llabels > 0, llabels, spread*binary)
segmentation = llabels*binary
lines = compute_lines(segmentation, scale)