Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get(self, obj_id):
"""
Get a document or a page using its ID
Won't instantiate them if they are not yet available
"""
if BasicPage.PAGE_ID_SEPARATOR in obj_id:
(docid, page_nb) = obj_id.split(BasicPage.PAGE_ID_SEPARATOR)
page_nb = int(page_nb)
return self._docs_by_id[docid].pages[page_nb]
return self._docs_by_id[obj_id]
def get(self, obj_id):
"""
Get a document or a page using its ID
Won't instantiate them if they are not yet available
"""
if BasicPage.PAGE_ID_SEPARATOR in obj_id:
(docid, page_nb) = obj_id.split(BasicPage.PAGE_ID_SEPARATOR)
page_nb = int(page_nb)
return self._docs_by_id[docid].pages[page_nb]
return self._docs_by_id[obj_id]
input_el = zip(input_str, input_rects)
for (is_split, group) in itertools.groupby(
input_el,
lambda x: splitter(x[0])
):
if is_split:
continue
letters = ""
rects = []
for (letter, rect) in group:
letters += letter
rects.append(rect)
yield(letters, rects)
class PdfPage(BasicPage):
EXT_TXT = "txt"
def __init__(self, doc, page_nb, on_disk_cache=True):
super().__init__(doc, page_nb)
self._size = None # page size never change --> can be cached
self.__boxes = None
self._on_disk_cache = on_disk_cache
def get_pdf_page(self, pdf=None):
if pdf is None:
pdf = self.doc.get_pdf()
pdf_page = pdf.get_page(self.page_nb)
return pdf_page
@property
def pdf_page(self):
import logging
import re
import PIL.Image
import pyocr
import pyocr.builders
from ..common.page import BasicPage
from ..util import image2surface
logger = logging.getLogger(__name__)
class ImgPage(BasicPage):
"""
Represents a page. A page is a sub-element of ImgDoc.
"""
EXT_IMG = "jpg"
KEYWORD_HIGHLIGHT = 3
FILE_REGEX = re.compile(
BasicPage.FILE_PREFIX + "[1-9][0-9]*\." + EXT_IMG,
re.IGNORECASE
)
can_edit = True
def __init__(self, doc, page_nb=None):
if page_nb is None:
def __init__(self, doc, page_nb=None):
if page_nb is None:
page_nb = doc.nb_pages
BasicPage.__init__(self, doc, page_nb)