Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
knowledge_graph,
tokenized_question,
self._entity_token_indexers,
tokenizer=self._tokenizer,
)
if self._tagger_only:
fields: Dict[str, Field] = {"tokens": question_field}
if entity_literals is not None:
entity_tags = self._get_entity_tags(
self._all_entities, table_field, entity_literals, tokenized_question
)
if debug_counter > 0:
logger.info(f"raw entity tags = {entity_tags}")
entity_tags_bio = self._convert_tags_bio(entity_tags)
fields["tags"] = SequenceLabelField(entity_tags_bio, question_field)
additional_metadata["tags_gold"] = entity_tags_bio
additional_metadata["words"] = [x.text for x in tokenized_question]
fields["metadata"] = MetadataField(additional_metadata)
return Instance(fields)
world_field = MetadataField(world)
production_rule_fields: List[Field] = []
for production_rule in world.all_possible_actions():
_, rule_right_side = production_rule.split(" -> ")
is_global_rule = not world.is_table_entity(rule_right_side)
field = ProductionRuleField(production_rule, is_global_rule)
production_rule_fields.append(field)
action_field = ListField(production_rule_fields)
fields = {
lemmas: List[str] = None,
lemma_rules: List[str] = None,
feats: List[str] = None,
ids: List[str] = None,
multiword_ids: List[str] = None,
multiword_forms: List[str] = None) -> Instance:
fields: Dict[str, Field] = {}
tokens = TextField([Token(w) for w in words], self._token_indexers)
fields["tokens"] = tokens
if lemma_rules:
fields["lemmas"] = SequenceLabelField(lemma_rules, tokens, label_namespace="lemmas")
if "feats":
fields["feats"] = SequenceLabelField(feats, tokens, label_namespace="feats")
# TODO: parameter to turn this off
feature_seq = []
for feat in feats:
features = feat.lower().split(";") if feat != "_" else "_"
dimensions = {dimension: "_" for dimension in unimorph_schema}
if feat != "_":
for label in features:
# Use regex to handle special cases where multi-labels are contained inside "{}"
first_label = re.findall(r"(?#{)([a-zA-Z0-9.\-_]+)(?#\+|\/|})", label)
first_label = first_label[0] if first_label else label
if first_label not in self.label_to_dimension:
if first_label.startswith("arg"):
question_span_fields: List[Field] = [
SpanField(span[0], span[1], question_field)
for span in answer_info["answer_question_spans"]
]
if not question_span_fields:
question_span_fields.append(SpanField(-1, -1, question_field))
fields["answer_as_question_spans"] = ListField(question_span_fields)
add_sub_signs_field: List[Field] = []
for signs_for_one_add_sub_expression in answer_info["signs_for_add_sub_expressions"]:
add_sub_signs_field.append(
SequenceLabelField(signs_for_one_add_sub_expression, numbers_in_passage_field)
)
if not add_sub_signs_field:
add_sub_signs_field.append(
SequenceLabelField([0] * len(number_tokens), numbers_in_passage_field)
)
fields["answer_as_add_sub_expressions"] = ListField(add_sub_signs_field)
count_fields: List[Field] = [
LabelField(count_label, skip_indexing=True) for count_label in answer_info["counts"]
]
if not count_fields:
count_fields.append(LabelField(-1, skip_indexing=True))
fields["answer_as_counts"] = ListField(count_fields)
metadata.update(additional_metadata)
fields["metadata"] = MetadataField(metadata)
return Instance(fields)
p1_span_end = span_end
if num_context_answers > 2:
p3_answer_marker_list.append(
SequenceLabelField(
prev_answer_marker_lists[3], passage_field, label_namespace="answer_tags"
)
)
if num_context_answers > 1:
p2_answer_marker_list.append(
SequenceLabelField(
prev_answer_marker_lists[2], passage_field, label_namespace="answer_tags"
)
)
if num_context_answers > 0:
p1_answer_marker_list.append(
SequenceLabelField(
prev_answer_marker_lists[1], passage_field, label_namespace="answer_tags"
)
)
fields["span_start"] = ListField(span_start_list)
fields["span_end"] = ListField(span_end_list)
if num_context_answers > 0:
fields["p1_answer_marker"] = ListField(p1_answer_marker_list)
if num_context_answers > 1:
fields["p2_answer_marker"] = ListField(p2_answer_marker_list)
if num_context_answers > 2:
fields["p3_answer_marker"] = ListField(p3_answer_marker_list)
fields["yesno_list"] = ListField(
[LabelField(yesno, label_namespace="yesno_labels") for yesno in yesno_list]
)
fields["followup_list"] = ListField(
[LabelField(followup, label_namespace="followup_labels") for followup in followup_list]
self, # type: ignore
tokens: List[Token],
ner_tags: List[str] = None,
) -> Instance:
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
sequence = TextField(tokens, self._token_indexers)
instance_fields: Dict[str, Field] = {"tokens": sequence}
instance_fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
# Add "tag label" to instance
if ner_tags is not None:
if self._coding_scheme == "BIOUL":
ner_tags = to_bioul(ner_tags, encoding="BIO")
instance_fields["tags"] = SequenceLabelField(ner_tags, sequence)
return Instance(instance_fields)
def text_to_instance( # type: ignore
self, tokens: List[Token], tags: List[str] = None
) -> Instance:
"""
We take `pre-tokenized` input here, because we don't have a tokenizer in this class.
"""
fields: Dict[str, Field] = {}
sequence = TextField(tokens, self._token_indexers)
fields["tokens"] = sequence
fields["metadata"] = MetadataField({"words": [x.text for x in tokens]})
if tags is not None:
fields["tags"] = SequenceLabelField(tags, sequence)
return Instance(fields)
if ner_tags is not None
else None
)
else:
# the default IOB1
coded_chunks = chunk_tags
coded_ner = ner_tags
# Add "feature labels" to instance
if "pos" in self.feature_labels:
if pos_tags is None:
raise ConfigurationError(
"Dataset reader was specified to use pos_tags as "
"features. Pass them to text_to_instance."
)
instance_fields["pos_tags"] = SequenceLabelField(pos_tags, sequence, "pos_tags")
if "chunk" in self.feature_labels:
if coded_chunks is None:
raise ConfigurationError(
"Dataset reader was specified to use chunk tags as "
"features. Pass them to text_to_instance."
)
instance_fields["chunk_tags"] = SequenceLabelField(coded_chunks, sequence, "chunk_tags")
if "ner" in self.feature_labels:
if coded_ner is None:
raise ConfigurationError(
"Dataset reader was specified to use NER tags as "
" features. Pass them to text_to_instance."
)
instance_fields["ner_tags"] = SequenceLabelField(coded_ner, sequence, "ner_tags")
# Add "tag label" to instance
def convert_to_instance(list_words,sentiment_label,PoS_labels):
tokenized_words = list(map(Token,list_words ))
word_and_character_text_field = TextField( tokenized_words,
token_indexers= {"tokens": SingleIdTokenIndexer(namespace="token_ids"),
"chars": TokenCharactersIndexer(namespace="token_chars")})
sentiment_analysis_label_field = LabelField(sentiment_label, label_namespace="sentiment_tags")
PoS_labels_field = SequenceLabelField(labels=PoS_labels, label_namespace = "PoS_tags",
sequence_field=word_and_character_text_field)
instance_i = Instance({"text_field": word_and_character_text_field,
"label_sentiment": sentiment_analysis_label_field,
"Pos_labels": PoS_labels_field})
return instance_i
if u'chunk' in self.feature_labels:
if coded_chunks is None:
raise ConfigurationError(u"Dataset reader was specified to use chunk tags as "
u"features. Pass them to text_to_instance.")
instance_fields[u'chunk_tags'] = SequenceLabelField(coded_chunks, sequence, u"chunk_tags")
if u'ner' in self.feature_labels:
if coded_ner is None:
raise ConfigurationError(u"Dataset reader was specified to use NER tags as "
u" features. Pass them to text_to_instance.")
instance_fields[u'ner_tags'] = SequenceLabelField(coded_ner, sequence, u"ner_tags")
# Add "tag label" to instance
if self.tag_label == u'ner' and coded_ner is not None:
instance_fields[u'tags'] = SequenceLabelField(coded_ner, sequence)
elif self.tag_label == u'pos' and pos_tags is not None:
instance_fields[u'tags'] = SequenceLabelField(pos_tags, sequence)
elif self.tag_label == u'chunk' and coded_chunks is not None:
instance_fields[u'tags'] = SequenceLabelField(coded_chunks, sequence)
return Instance(instance_fields)
if "modified_pos" in self.feature_labels:
if modified_pos_tags is None:
raise ConfigurationError(
"Dataset reader was specified to use modified POS tags as "
" features. Pass them to text_to_instance."
)
fields["modified_pos_tags"] = SequenceLabelField(
modified_pos_tags, text_field, "modified_pos_tags"
)
if "predicate_arg" in self.feature_labels:
if predicate_arg_categories is None:
raise ConfigurationError(
"Dataset reader was specified to use predicate arg tags as "
" features. Pass them to text_to_instance."
)
fields["predicate_arg_tags"] = SequenceLabelField(
predicate_arg_categories, text_field, "predicate_arg_tags"
)
# Add "tag label" to instance
if self.tag_label == "ccg" and ccg_categories is not None:
fields["tags"] = SequenceLabelField(ccg_categories, text_field, self.label_namespace)
elif self.tag_label == "original_pos" and original_pos_tags is not None:
fields["tags"] = SequenceLabelField(original_pos_tags, text_field, self.label_namespace)
elif self.tag_label == "modified_pos" and modified_pos_tags is not None:
fields["tags"] = SequenceLabelField(modified_pos_tags, text_field, self.label_namespace)
elif self.tag_label == "predicate_arg" and predicate_arg_categories is not None:
fields["tags"] = SequenceLabelField(
predicate_arg_categories, text_field, self.label_namespace
)
return Instance(fields)