How to use the flattentool.input.SpreadsheetInput function in flattentool

To help you get started, we’ve selected a few flattentool examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github OpenDataServices / flatten-tool / flattentool / input.py View on Github external
def get_sheet_lines(self, sheet_name):
        # Pass the encoding to the open function
        with open(
            os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding
        ) as main_sheet_file:
            dictreader = DictReader(main_sheet_file)
            for row in self.generate_rows(dictreader, sheet_name):
                yield row


class BadXLSXZipFile(BadZipFile):
    pass


class XLSXInput(SpreadsheetInput):
    def read_sheets(self):
        try:
            self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
        except BadZipFile as e:  # noqa
            # TODO when we have python3 only add 'from e' to show exception chain
            raise BadXLSXZipFile(
                "The supplied file has extension .xlsx but isn't an XLSX file."
            )

        self.sheet_names_map = OrderedDict(
            (sheet_name, sheet_name) for sheet_name in self.workbook.sheetnames
        )
        if self.include_sheets:
            for sheet in list(self.sheet_names_map):
                if sheet not in self.include_sheets:
                    self.sheet_names_map.pop(sheet)
github OpenDataServices / flatten-tool / flattentool / input.py View on Github external
output = OrderedDict()
    for k in input:
        if isinstance(input[k], list):
            output[k] = extract_list_to_value(input[k])
        elif isinstance(input[k], dict):
            output[k] = extract_dict_to_value(input[k])
        elif isinstance(input[k], Cell):
            output[k] = input[k].cell_value
        else:
            raise Exception(
                "Unexpected result type in the JSON cell tree: {}".format(input[k])
            )
    return output


class CSVInput(SpreadsheetInput):
    encoding = "utf-8"

    def get_sheet_headings(self, sheet_name):
        sheet_configuration = self.sheet_configuration[self.sheet_names_map[sheet_name]]
        configuration_line = 1 if sheet_configuration else 0
        if not sheet_configuration:
            sheet_configuration = self.base_configuration
        if not self.use_configuration:
            sheet_configuration = {}
        skip_rows = sheet_configuration.get("skipRows", 0)
        if sheet_configuration.get("ignore"):
            # returning empty headers is a proxy for no data in the sheet.
            return []

        with open(
            os.path.join(self.input_name, sheet_name + ".csv"), encoding=self.encoding
github OpenDataServices / flatten-tool / flattentool / input.py View on Github external
for row in remaining_rows:
            output_row = OrderedDict()
            for i, x in enumerate(row):
                header = coli_to_header[i]
                value = x.value
                if not header:
                    # None means that the cell will be ignored
                    value = None
                elif sheet_configuration.get("hashcomments") and header.startswith("#"):
                    # None means that the cell will be ignored
                    value = None
                output_row[header] = value
            yield output_row


class ODSInput(SpreadsheetInput):
    def read_sheets(self):
        self.workbook = ODSReader(self.input_name)
        self.sheet_names_map = self.workbook.SHEETS

        if self.include_sheets:
            for sheet in list(self.sheet_names_map):
                if sheet not in self.include_sheets:
                    self.sheet_names_map.pop(sheet)

        for sheet in self.exclude_sheets or []:
            self.sheet_names_map.pop(sheet, None)

        self.sub_sheet_names = self.sheet_names_map.keys()
        self.configure_sheets()

    def _resolve_sheet_configuration(self, sheet_name):