How to use the spatula.PDF function in spatula

To help you get started, we’ve selected a few spatula examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openstates / openstates / openstates / fl / bills.py View on Github external
raise ValueError("vote count incorrect: " + self.url)

        if nv_count != 0:
            # On a rare occasion, a member won't have a vote code,
            # which indicates that they didn't vote. The totals reflect
            # this.
            self.scraper.info("Votes don't add up; looking for additional ones")
            for line in self.lines[VOTE_START_INDEX:]:
                if not line.strip():
                    break
                for member in re.findall(r"\s{8,}([A-Z][a-z\'].*?)-\d{1,3}", line):
                    vote.vote("not voting", member)
        yield vote


class UpperComVote(PDF):
    def handle_page(self):
        (_, motion) = self.lines[5].split("FINAL ACTION:")
        motion = motion.strip()
        if not motion:
            self.scraper.warning("Vote appears to be empty")
            return

        vote_top_row = [
            self.lines.index(x)
            for x in self.lines
            if re.search(r"^\s+Yea\s+Nay.*?(?:\s+Yea\s+Nay)+$", x)
        ][0]
        yea_columns_end = self.lines[vote_top_row].index("Yea") + len("Yea")
        nay_columns_begin = self.lines[vote_top_row].index("Nay")

        votes = {"yes": [], "no": [], "other": []}
github openstates / openstates / openstates / fl / bills.py View on Github external
vote.yes(member)
            elif member_vote == "N":
                vote.no(member)
            elif member_vote == "-":
                vote.vote("not voting", member)
            # Parenthetical votes appear to not be counted in the
            # totals for Yea, Nay, _or_ Missed
            elif re.search(r"\([YN]\)", member_vote):
                continue
            else:
                raise ValueError("Unknown vote type found: {}".format(member_vote))

        yield vote


class SubjectPDF(PDF):
    pdftotext_type = "text-nolayout"

    def handle_page(self):
        """
            sort of a state machine

            after a blank line if there's an all caps phrase that's the new subject

            if a line contains (H|S)(\\d+) that bill gets current subject
        """
        subjects = defaultdict(set)

        SUBJ_RE = re.compile("^[A-Z ,()]+$")
        BILL_RE = re.compile(r"[HS]\d+(?:-[A-Z])?")

        subject = None
github openstates / openstates / openstates / fl / bills.py View on Github external
yield from self.scrape_page_items(
                        FloorVote,
                        vote_url,
                        date=vote_date,
                        chamber="lower",
                        bill=self.obj,
                    )
                else:
                    yield from self.scrape_page_items(
                        UpperComVote, vote_url, date=vote_date, bill=self.obj
                    )
        else:
            self.scraper.warning("No vote table for {}".format(self.obj.identifier))


class FloorVote(PDF):
    def handle_page(self):
        MOTION_INDEX = 4
        TOTALS_INDEX = 6
        VOTE_START_INDEX = 9

        if len(self.lines) < 2:
            self.scraper.warning("Bad PDF! " + self.url)
            return

        motion = self.lines[MOTION_INDEX].strip()
        # Sometimes there is no motion name, only "Passage" in the line above
        if not motion and not self.lines[MOTION_INDEX - 1].startswith("Calendar Page:"):
            motion = self.lines[MOTION_INDEX - 1]
            MOTION_INDEX -= 1
            TOTALS_INDEX -= 1
            VOTE_START_INDEX -= 1

spatula

A modern Python library for writing maintainable web scrapers.

MIT
Latest version published 4 months ago

Package Health Score

69 / 100
Full package analysis