Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _read_file(self, input_file, is_training):
"""Reads a tab separated value file."""
with io.open(input_file, "r", encoding="UTF-8") as file:
examples = []
for (i, line) in enumerate(file):
if i == 0 and is_training:
continue
data = line.strip().split("_!_")
example = InputExample(
guid=i, label=data[0], text_a=data[2], text_b=data[3])
examples.append(example)
return examples
def _read_file(self, input_file):
"""Reads a tab separated value file."""
with io.open(input_file, "r", encoding="UTF-8") as file:
examples = []
for line in file:
data = line.strip().split("_!_")
example = InputExample(
guid=data[0], label=data[1], text_a=data[3])
examples.append(example)
return examples
def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = []
seq_id = 0
header = next(reader) # skip header
for line in reader:
example = InputExample(
guid=seq_id, label=line[0], text_a=line[1])
seq_id += 1
examples.append(example)
return examples
def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
with io.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = []
seq_id = 0
header = next(reader) # skip header
for line in reader:
example = InputExample(
guid=seq_id, label=line[2], text_a=line[0], text_b=line[1])
seq_id += 1
examples.append(example)
return examples
def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
with codecs.open(input_file, "r", encoding="UTF-8") as f:
reader = csv.reader(f, delimiter="\t", quotechar=quotechar)
examples = []
seq_id = 0
header = next(reader) # skip header
for line in reader:
example = InputExample(
guid=seq_id, label=line[3], text_a=line[1], text_b=line[2])
seq_id += 1
examples.append(example)
return examples
else:
label_index, text_a_index, text_b_index = [-1, 8, 9]
elif self.sub_dataset in ['CoLA']:
if wo_label:
label_index, text_a_index, text_b_index = [None, 1, None]
else:
label_index, text_a_index, text_b_index = [1, 3, None]
elif self.sub_dataset in ['STS-B']:
if wo_label:
label_index, text_a_index, text_b_index = [None, -2, -1]
else:
label_index, text_a_index, text_b_index = [-1, -3, -2]
for line in reader:
try:
example = InputExample(
guid=seq_id,
text_a=line[text_a_index],
text_b=line[text_b_index]
if text_b_index is not None else None,
label=line[label_index]
if label_index is not None else None)
seq_id += 1
examples.append(example)
except:
logger.info("[Discard Incorrect Data] " + "\t".join(line))
return examples
def _read_csv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
data = pd.read_csv(input_file, encoding="UTF-8")
examples = []
for index, row in data.iterrows():
guid = row["id"]
text = row["comment_text"]
labels = [int(value) for value in row[2:]]
example = InputExample(guid=guid, label=labels, text_a=text)
examples.append(example)
return examples