Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from . import utils
from .table import TableFinder
from .container import Container
from copy import copy
from six import string_types
import re
lt_pat = re.compile(r"^LT")
class Page(Container):
cached_properties = Container.cached_properties + [ "_layout" ]
is_original = True
def __init__(self, pdf, page_obj, page_number=None, initial_doctop=0):
self.pdf = pdf
self.page_obj = page_obj
self.page_number = page_number
self.rotation = self.page_obj.attrs.get("Rotate", 0) % 360
self.page_obj.rotate = self.rotation
self.initial_doctop = self.decimalize(initial_doctop)
cropbox = page_obj.attrs.get("CropBox", page_obj.attrs.get("MediaBox"))
self.cropbox = self.decimalize(cropbox)
if self.rotation in [ 90, 270 ]:
self.bbox = self.decimalize((
from pdfplumber.container import Container
from pdfplumber.page import Page
from pdfplumber.utils import decode_text
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.layout import LAParams
from pdfminer.converter import PDFPageAggregator
from pdfminer.psparser import PSLiteral
class PDF(Container):
cached_properties = Container.cached_properties + [ "_pages" ]
def __init__(self, stream, pages=None, laparams=None, precision=0.001):
self.laparams = None if laparams == None else LAParams(**laparams)
self.stream = stream
self.pages_to_parse = pages
self.precision = precision
rsrcmgr = PDFResourceManager()
self.doc = PDFDocument(PDFParser(stream))
self.metadata = {}
for info in self.doc.info:
self.metadata.update(info)
for k, v in self.metadata.items():
if hasattr(v, "resolve"):
v = v.resolve()
if type(v) == list: