Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# preparse input sizes from vgsl string to seed ground truth data set
# sizes and dimension ordering.
if not nn:
spec = spec.strip()
if spec[0] != '[' or spec[-1] != ']':
raise click.BadOptionUsage('spec', 'VGSL spec {} not bracketed'.format(spec))
blocks = spec[1:-1].split(' ')
m = re.match(r'(\d+),(\d+),(\d+),(\d+)', blocks[0])
if not m:
raise click.BadOptionUsage('spec', 'Invalid input spec {}'.format(blocks[0]))
batch, height, width, channels = [int(x) for x in m.groups()]
else:
batch, channels, height, width = nn.input
try:
transforms = generate_input_transforms(batch, height, width, channels, pad)
except KrakenInputException as e:
raise click.BadOptionUsage('spec', str(e))
# disable automatic partition when given evaluation set explicitly
if evaluation_files:
partition = 1
ground_truth = list(ground_truth)
# merge training_files into ground_truth list
if training_files:
ground_truth.extend(training_files)
if len(ground_truth) == 0:
raise click.UsageError('No training data was provided to the train command. Use `-t` or the `ground_truth` argument.')
np.random.shuffle(ground_truth)
raise click.UsageError('No evaluation data was provided to the test command. Use `-e` or the `test_set` argument.')
def _get_text(im):
with open(os.path.splitext(im)[0] + '.gt.txt', 'r') as fp:
return get_display(fp.read())
acc_list = []
for p, net in nn.items():
algn_gt: List[str] = []
algn_pred: List[str] = []
chars = 0
error = 0
message('Evaluating {}'.format(p))
logger.info('Evaluating {}'.format(p))
batch, channels, height, width = net.nn.input
ts = generate_input_transforms(batch, height, width, channels, pad)
with log.progressbar(test_set, label='Evaluating') as bar:
for im_path in bar:
i = ts(Image.open(im_path))
text = _get_text(im_path)
pred = net.predict_string(i)
chars += len(text)
c, algn1, algn2 = global_align(text, pred)
algn_gt.extend(algn1)
algn_pred.extend(algn2)
error += c
acc_list.append((chars-error)/chars)
confusions, scripts, ins, dels, subs = compute_confusions(algn_gt, algn_pred)
rep = render_report(p, chars, error, confusions, scripts, ins, dels, subs)
logger.info(rep)
message(rep)
logger.info('Average accuracy: {:0.2f}%, (stddev: {:0.2f})'.format(np.mean(acc_list) * 100, np.std(acc_list) * 100))
KrakenInputException if the mapping between segmentation scripts and
networks is incomplete.
"""
im_str = get_im_str(im)
logger.info('Running {} multi-script recognizers on {} with {} lines'.format(len(nets), im_str, len(bounds['boxes'])))
miss = [x[0] for x in bounds['boxes'] if not nets.get(x[0])]
if miss:
raise KrakenInputException('Missing models for scripts {}'.format(miss))
# build dictionary for line preprocessing
ts = {}
for script, network in nets.items():
logger.debug('Loading line transforms for {}'.format(script))
batch, channels, height, width = network.nn.input
ts[script] = generate_input_transforms(batch, height, width, channels, pad)
for line in bounds['boxes']:
rec = ocr_record('', [], [])
for script, (box, coords) in zip(map(lambda x: x[0], line),
extract_boxes(im, {'text_direction': bounds['text_direction'],
'boxes': map(lambda x: x[1], line)})):
# skip if script is set to ignore
if script_ignore is not None and script in script_ignore:
logger.info('Ignoring {} line segment.'.format(script))
continue
# check if boxes are non-zero in any dimension
if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
logger.warning('Run with zero dimension. Skipping.')
continue
# try conversion into tensor
try:
'horizontal-lr/rl/vertical-lr/rl'.
pad (int): Extra blank padding to the left and right of text line.
Auto-disabled when expected network inputs are incompatible
with padding.
bidi_reordering (bool): Reorder classes in the ocr_record according to
the Unicode bidirectional algorithm for correct
display.
Yields:
An ocr_record containing the recognized text, absolute character
positions, and confidence values for each character.
"""
im_str = get_im_str(im)
logger.info('Running recognizer on {} with {} lines'.format(im_str, len(bounds['boxes'])))
logger.debug('Loading line transform')
batch, channels, height, width = network.nn.input
ts = generate_input_transforms(batch, height, width, channels, pad)
for box, coords in extract_boxes(im, bounds):
# check if boxes are non-zero in any dimension
if sum(coords[::2]) == 0 or coords[3] - coords[1] == 0:
logger.warning('bbox {} with zero dimension. Emitting empty record.'.format(coords))
yield ocr_record('', [], [])
continue
# try conversion into tensor
try:
line = ts(box)
except Exception:
yield ocr_record('', [], [])
continue
# check if line is non-zero
if line.max() == line.min():
yield ocr_record('', [], [])
model = vgsl.TorchVGSLModel.load_model(model)
model.eval()
if mask:
if mask.mode != '1' and not is_bitonal(mask):
logger.error('Mask is not bitonal')
raise KrakenInputException('Mask is not bitonal')
mask = mask.convert('1')
if mask.size != im.size:
logger.error('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
raise KrakenInputException('Mask size {} doesn\'t match image size {}'.format(mask.size, im.size))
logger.info('Masking enabled in segmenter.')
mask = pil2array(mask)
batch, channels, height, width = model.input
transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
res_tf = tf.Compose(transforms.transforms[:2])
scal_im = res_tf(im).convert('L')
with torch.no_grad():
logger.debug('Running network forward pass')
o = model.nn(transforms(im).unsqueeze(0))
logger.debug('Upsampling network output')
o = F.interpolate(o, size=scal_im.size[::-1])
o = o.squeeze().numpy()
logger.debug('Vectorizing network output')
baselines = vectorize_lines(o)
logger.debug('Polygonizing lines')
lines = list(zip(baselines, calculate_polygonal_environment(scal_im, baselines)))
logger.debug('Scaling vectorized lines')
scale = np.divide(im.size, o.shape[:0:-1])
lines = scale_polygonal_lines(lines, scale)
"""
Produces semi-transparent neural segmenter output overlays
"""
import sys
import torch
from PIL import Image
from kraken.lib import vgsl, dataset
import torch.nn.functional as F
from os.path import splitext
model = vgsl.TorchVGSLModel.load_model(sys.argv[1])
model.eval()
batch, channels, height, width = model.input
transforms = dataset.generate_input_transforms(batch, height, width, channels, 0, valid_norm=False)
imgs = sys.argv[2:]
torch.set_num_threads(1)
for img in imgs:
print(img)
im = Image.open(img)
with torch.no_grad():
o = model.nn(transforms(im).unsqueeze(0))
o = F.interpolate(o, size=im.size[::-1])
o = o.squeeze().numpy()
heat = Image.fromarray((o[1]*255).astype('uint8'))
heat.save(splitext(img)[0] + '.heat.png')
overlay = Image.new('RGBA', im.size, (0, 130, 200, 255))
Image.composite(overlay, im.convert('RGBA'), heat).save(splitext(img)[0] + '.overlay.png')
del o