How to use the pdfplumber.container.Container function in pdfplumber

To help you get started, we’ve selected a few pdfplumber examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jsvine / pdfplumber / pdfplumber / page.py View on Github external
from . import utils
from .table import TableFinder
from .container import Container
from copy import copy

from six import string_types
import re
lt_pat = re.compile(r"^LT")

class Page(Container):
    cached_properties = Container.cached_properties + [ "_layout" ]
    is_original = True

    def __init__(self, pdf, page_obj, page_number=None, initial_doctop=0):
        self.pdf = pdf
        self.page_obj = page_obj
        self.page_number = page_number
        self.rotation = self.page_obj.attrs.get("Rotate", 0) % 360
        self.page_obj.rotate = self.rotation
        self.initial_doctop = self.decimalize(initial_doctop)

        cropbox = page_obj.attrs.get("CropBox", page_obj.attrs.get("MediaBox"))
        self.cropbox = self.decimalize(cropbox)

        if self.rotation in [ 90, 270 ]:
            self.bbox = self.decimalize((
github jsvine / pdfplumber / pdfplumber / pdf.py View on Github external
from pdfplumber.container import Container
from pdfplumber.page import Page
from pdfplumber.utils import decode_text

from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.layout import LAParams
from pdfminer.converter import PDFPageAggregator
from pdfminer.psparser import PSLiteral

class PDF(Container):
    cached_properties = Container.cached_properties + [ "_pages" ]

    def __init__(self, stream, pages=None, laparams=None, precision=0.001):
        self.laparams = None if laparams == None else LAParams(**laparams)
        self.stream = stream
        self.pages_to_parse = pages
        self.precision = precision
        rsrcmgr = PDFResourceManager()
        self.doc = PDFDocument(PDFParser(stream))
        self.metadata = {}
        for info in self.doc.info:
            self.metadata.update(info)
        for k, v in self.metadata.items():
            if hasattr(v, "resolve"):
                v = v.resolve()
            if type(v) == list: