Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Separators
"""
logger.debug('Finding column separators')
# find vertical whitespace by thresholding
smoothed = gaussian_filter(1.0*binary, (scale, scale*0.5))
smoothed = uniform_filter(smoothed, (5.0*scale, 1))
thresh = (smoothed < np.amax(smoothed)*0.1)
# find column edges by filtering
grad = gaussian_filter(1.0*binary, (scale, scale*0.5), order=(0, 1))
grad = uniform_filter(grad, (10.0*scale, 1))
grad = (grad > 0.5*np.amax(grad))
# combine edges and whitespace
seps = np.minimum(thresh, maximum_filter(grad, (int(scale), int(5*scale))))
seps = maximum_filter(seps, (int(2*scale), 1))
# select only the biggest column separators
seps = morph.select_regions(seps, sl.dim0, min=minheight*scale,
nbest=maxcolseps)
return seps
def remove_hlines(binary: np.array, scale: float, maxsize: int = 10) -> np.array:
"""
Removes horizontal black lines that only interfere with page segmentation.
Args:
binary (numpy.array):
scale (float):
maxsize (int): maximum size of removed lines
Returns:
numpy.array containing the filtered image.
"""
logger.debug('Filtering horizontal lines')
labels, _ = morph.label(binary)
objects = morph.find_objects(labels)
for i, b in enumerate(objects):
if sl.width(b) > maxsize*scale:
labels[b][labels[b] == i+1] = 0
return np.array(labels != 0, 'B')
def binary_objects(binary: np.array) -> np.array:
"""
Labels features in an array and segments them into objects.
"""
labels, _ = morph.label(binary)
objects = morph.find_objects(labels)
return objects
def remove_hlines(binary: np.array, scale: float, maxsize: int = 10) -> np.array:
"""
Removes horizontal black lines that only interfere with page segmentation.
Args:
binary (numpy.array):
scale (float):
maxsize (int): maximum size of removed lines
Returns:
numpy.array containing the filtered image.
"""
logger.debug('Filtering horizontal lines')
labels, _ = morph.label(binary)
objects = morph.find_objects(labels)
for i, b in enumerate(objects):
if sl.width(b) > maxsize*scale:
labels[b][labels[b] == i+1] = 0
return np.array(labels != 0, 'B')
delta = max(3, int(scale/2))
for x in range(bmarked.shape[1]):
transitions = sorted([(y, 1) for y in find(bmarked[:, x])] +
[(y, 0) for y in find(tmarked[:, x])])[::-1]
transitions += [(0, 0)]
for l in range(len(transitions)-1):
y0, s0 = transitions[l]
if s0 == 0:
continue
seeds[y0-delta:y0, x] = 1
y1, s1 = transitions[l+1]
if s1 == 0 and (y0-y1) < 5*scale:
seeds[y1:y0, x] = 1
seeds = maximum_filter(seeds, (1, int(1+scale)))
seeds = seeds * (1-colseps)
seeds, _ = morph.label(seeds)
return seeds
def compute_separators_morph(binary: np.array, scale: float,
sepwiden: int = 10, maxcolseps: int = 2) -> np.array:
"""Finds vertical black lines corresponding to column separators."""
logger.debug('Finding vertical black column lines')
d0 = int(max(5, scale/4))
d1 = int(max(5, scale)) + sepwiden
thick = morph.r_dilation(binary, (d0, d1))
vert = morph.rb_opening(thick, (10*scale, 1))
vert = morph.r_erosion(vert, (d0//2, sepwiden))
vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2*maxcolseps)
vert = morph.select_regions(vert, sl.dim0, min=20*scale, nbest=maxcolseps)
return vert
def compute_separators_morph(binary: np.array, scale: float,
sepwiden: int = 10, maxcolseps: int = 2) -> np.array:
"""Finds vertical black lines corresponding to column separators."""
logger.debug('Finding vertical black column lines')
d0 = int(max(5, scale/4))
d1 = int(max(5, scale)) + sepwiden
thick = morph.r_dilation(binary, (d0, d1))
vert = morph.rb_opening(thick, (10*scale, 1))
vert = morph.r_erosion(vert, (d0//2, sepwiden))
vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2*maxcolseps)
vert = morph.select_regions(vert, sl.dim0, min=20*scale, nbest=maxcolseps)
return vert
def compute_separators_morph(binary: np.array, scale: float,
sepwiden: int = 10, maxcolseps: int = 2) -> np.array:
"""Finds vertical black lines corresponding to column separators."""
logger.debug('Finding vertical black column lines')
d0 = int(max(5, scale/4))
d1 = int(max(5, scale)) + sepwiden
thick = morph.r_dilation(binary, (d0, d1))
vert = morph.rb_opening(thick, (10*scale, 1))
vert = morph.r_erosion(vert, (d0//2, sepwiden))
vert = morph.select_regions(vert, sl.dim1, min=3, nbest=2*maxcolseps)
vert = morph.select_regions(vert, sl.dim0, min=20*scale, nbest=maxcolseps)
return vert