How to use the paddlehub.dataset.InputExample function in paddlehub

To help you get started, we’ve selected a few paddlehub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / PaddleHub / paddlehub / dataset / inews.py View on Github external
def _read_file(self, input_file, is_training):
        """Reads a tab separated value file."""
        with io.open(input_file, "r", encoding="UTF-8") as file:
            examples = []
            for (i, line) in enumerate(file):
                if i == 0 and is_training:
                    continue
                data = line.strip().split("_!_")
                example = InputExample(
                    guid=i, label=data[0], text_a=data[2], text_b=data[3])
                examples.append(example)
            return examples
github PaddlePaddle / PaddleHub / paddlehub / dataset / tnews.py View on Github external
def _read_file(self, input_file):
        """Reads a tab separated value file."""
        with io.open(input_file, "r", encoding="UTF-8") as file:
            examples = []
            for line in file:
                data = line.strip().split("_!_")
                example = InputExample(
                    guid=data[0], label=data[1], text_a=data[3])
                examples.append(example)

            return examples
github PaddlePaddle / PaddleHub / paddlehub / dataset / chnsenticorp.py View on Github external
def _read_tsv(self, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with codecs.open(input_file, "r", encoding="UTF-8") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            examples = []
            seq_id = 0
            header = next(reader)  # skip header
            for line in reader:
                example = InputExample(
                    guid=seq_id, label=line[0], text_a=line[1])
                seq_id += 1
                examples.append(example)

            return examples
github PaddlePaddle / PaddleHub / paddlehub / dataset / xnli.py View on Github external
def _read_tsv(self, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with io.open(input_file, "r", encoding="UTF-8") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            examples = []
            seq_id = 0
            header = next(reader)  # skip header
            for line in reader:
                example = InputExample(
                    guid=seq_id, label=line[2], text_a=line[0], text_b=line[1])
                seq_id += 1
                examples.append(example)

            return examples
github PaddlePaddle / PaddleHub / paddlehub / dataset / nlpcc_dbqa.py View on Github external
def _read_tsv(self, input_file, quotechar=None):
        """Reads a tab separated value file."""
        with codecs.open(input_file, "r", encoding="UTF-8") as f:
            reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
            examples = []
            seq_id = 0
            header = next(reader)  # skip header
            for line in reader:
                example = InputExample(
                    guid=seq_id, label=line[3], text_a=line[1], text_b=line[2])
                seq_id += 1
                examples.append(example)

            return examples
github PaddlePaddle / PaddleHub / paddlehub / dataset / glue.py View on Github external
else:
                    label_index, text_a_index, text_b_index = [-1, 8, 9]
            elif self.sub_dataset in ['CoLA']:
                if wo_label:
                    label_index, text_a_index, text_b_index = [None, 1, None]
                else:
                    label_index, text_a_index, text_b_index = [1, 3, None]
            elif self.sub_dataset in ['STS-B']:
                if wo_label:
                    label_index, text_a_index, text_b_index = [None, -2, -1]
                else:
                    label_index, text_a_index, text_b_index = [-1, -3, -2]

            for line in reader:
                try:
                    example = InputExample(
                        guid=seq_id,
                        text_a=line[text_a_index],
                        text_b=line[text_b_index]
                        if text_b_index is not None else None,
                        label=line[label_index]
                        if label_index is not None else None)
                    seq_id += 1
                    examples.append(example)
                except:
                    logger.info("[Discard Incorrect Data] " + "\t".join(line))
            return examples
github PaddlePaddle / PaddleHub / paddlehub / dataset / toxic.py View on Github external
def _read_csv(self, input_file, quotechar=None):
        """Reads a tab separated value file."""
        data = pd.read_csv(input_file, encoding="UTF-8")
        examples = []
        for index, row in data.iterrows():
            guid = row["id"]
            text = row["comment_text"]
            labels = [int(value) for value in row[2:]]
            example = InputExample(guid=guid, label=labels, text_a=text)
            examples.append(example)

        return examples