Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
all_labels = {
"Task": all_task_labels,
"Process": all_process_labels,
"Material": all_material_labels,
}
for entity_type in self.entity_types:
label_stats = dict(collections.Counter(all_labels[entity_type]))
classes = list(set(label_stats.keys()))
classes = sorted(classes)
header = ["label index", "label name", "count"]
rows = [
(class_, self.idx2classnames[class_], label_stats[class_])
for class_ in classes
]
formatted = wasabi.table(data=rows, header=header, divider=True)
self.msg_printer.divider(
f"Label Stats for Science IE {self.dataset_type} dataset with Entity Type {entity_type}"
)
print(formatted)
# print some other stats
random_instance = self.word_instances[index_max_instance]
random_label = self.labels[index_max_instance].split()
random_task_label = [label.split(":")[0] for label in random_label]
random_process_label = [label.split(":")[1] for label in random_label]
random_material_label = [label.split(":")[2] for label in random_label]
assert len(random_instance) == len(random_label)
self.msg_printer.divider(
f"Random Instance from Parscit {self.dataset_type.capitalize()} Dataset"
)
self.msg_printer.text(title="Task Labels")
def _get_label_stats_table(self):
all_labels = []
for label in self.labels:
all_labels.extend(label.split())
labels_stats = dict(collections.Counter(all_labels))
classes = list(set(labels_stats.keys()))
classes = sorted(classes)
header = ["label index", "label name", "count"]
classname2idx = self.wrapped_cls.get_classname2idx()
rows = [
(classname2idx[class_], class_, labels_stats[class_]) for class_ in classes
]
formatted = wasabi.table(data=rows, header=header, divider=True)
return formatted
print(tagged_string)
self.msg_printer.text(title="Material Labels")
tagged_string = self.tag_visualizer.visualize_tokens(
random_instance, random_material_label
)
print(tagged_string)
num_instances = len(self)
other_stats_header = ["", "Value"]
rows = [
("Num Instances", num_instances),
("Longest Instance Length", max_len_instance),
]
other_stats_table = wasabi.table(
data=rows, header=other_stats_header, divider=True
)
self.msg_printer.divider(
f"Other stats for ScienceIE {self.dataset_type} dataset"
)
print(other_stats_table)
header_row = [" ", "Precision", "Recall", "F_measure"]
rows = []
for class_num in classes:
p = precision_dict[class_num]
r = recall_dict[class_num]
f = fscore_dict[class_num]
rows.append(
(f"cls_{class_num} ({idx2labelname_mapping[int(class_num)]})", p, r, f)
)
rows.append(["-"] * 4)
rows.append(["Macro", macro_precision, macro_recall, macro_fscore])
rows.append(["Micro", micro_precision, micro_recall, micro_fscore])
return wasabi.table(rows, header=header_row, divider=True)
self.msg_printer.divider(
f"Random Instance from Parscit {self.dataset_type.capitalize()} Dataset"
)
tagged_string = self.tag_visualizer.visualize_tokens(
random_instance, random_label
)
print(tagged_string)
num_instances = len(self)
other_stats_header = ["", "Value"]
rows = [
("Num Instances", num_instances),
("Longest Instance Length", self.instance_max_len),
]
other_stats_table = wasabi.table(
data=rows, header=other_stats_header, divider=True
)
self.msg_printer.divider(f"Other stats for Parscit {self.dataset_type} dataset")
print(other_stats_table)
def print_stats(self) -> None:
orig_vocab_len = self.get_orig_vocab_len()
vocab_len = self.get_vocab_len()
N = 5
top_n = self.get_topn_frequent_words(n=N)
data = [
("Original vocab length", orig_vocab_len),
("Clipped vocab length", vocab_len),
("Top {0} words".format(N), top_n),
]
header = ("Stats Description", "#")
table_string = wasabi.table(data=data, header=header, divider=True)
self.msg_printer.divider("VOCAB STATS")
print(table_string)
metric = self.get_metric()[namespace]
acc = metric["accuracy"]
precision = metric["precision"]
recall = metric["recall"]
fscore = metric["fscore"]
# build table
header_row = ["Metric", "Value"]
rows = [
("Acc", acc),
("Precision", precision),
("Recall", recall),
("Fscore", fscore),
]
table = wasabi.table(rows, header=header_row, divider=True)
reports[namespace] = table
return reports
"""
num_instances = len(self.instances)
all_labels = []
for idx in range(num_instances):
tokens, labels, len_tokens = self[idx]
all_labels.append(labels.item())
labels_stats = dict(collections.Counter(all_labels))
classes = list(set(labels_stats.keys()))
classes = sorted(classes)
header = ["label index", "label name", "count"]
rows = [
(class_, self.idx2classname[class_], labels_stats[class_])
for class_ in classes
]
formatted = wasabi.table(data=rows, header=header, divider=True)
self.msg_printer.divider("Stats for {0} dataset".format(self.dataset_type))
print(formatted)
self.msg_printer.info(
"Number of instances in {0} dataset - {1}".format(
self.dataset_type, len(self)
)