Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def flatten_emb_by_sentence(emb, text_len_mask):
"""
Create boolean mask for emb tensor.
Args:
emb: Some embeddings tensor with rank 2 or 3
text_len_mask: A mask tensor representing the first N positions of each row.
Returns: emb tensor after mask applications.
"""
num_sentences = tf.shape(emb)[0]
max_sentence_length = tf.shape(emb)[1]
emb_rank = len(emb.get_shape())
if emb_rank == 2:
flattened_emb = tf.reshape(emb, [num_sentences * max_sentence_length])
elif emb_rank == 3:
flattened_emb = tf.reshape(emb, [num_sentences * max_sentence_length, shape(emb, 2)])
else:
raise ValueError("Unsupported rank: {}".format(emb_rank))
return tf.boolean_mask(flattened_emb, text_len_mask)
genre_emb):
"""
Forms a new tensor using special features, mentions embeddings, mentions scores, etc.
and passes it through a fully-connected network that compute antecedent scores.
Args:
mention_emb: [num_mentions, emb], a tensor that contains of embeddings of specific mentions
mention_scores: [num_mentions, 1], Output of the fully-connected network, that compute the mentions scores.
antecedents: [] get from C++ function
antecedents_len: [] get from C++ function
mention_speaker_ids: [num_mentions, speaker_emb_size], tf.float64, Speaker IDs.
genre_emb: [genre_emb_size], tf.float64, Genre
Returns: tf.float64, [num_mentions, max_ant + 1], antecedent scores.
"""
num_mentions = custom_layers.shape(mention_emb, 0)
max_antecedents = custom_layers.shape(antecedents, 1)
feature_emb_list = []
if self.use_metadata:
antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant]
same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1),
antecedent_speaker_ids) # [num_mentions, max_ant]
speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.feature_size],
dtype=tf.float64),
tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb]
feature_emb_list.append(speaker_pair_emb)
tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
[num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb]
feature_emb_list.append(tiled_genre_emb)
genre_emb):
"""
Forms a new tensor using special features, mentions embeddings, mentions scores, etc.
and passes it through a fully-connected network that compute antecedent scores.
Args:
mention_emb: [num_mentions, emb], a tensor that contains of embeddings of specific mentions
mention_scores: [num_mentions, 1], Output of the fully-connected network, that compute the mentions scores.
antecedents: [] get from C++ function
antecedents_len: [] get from C++ function
mention_speaker_ids: [num_mentions, speaker_emb_size], tf.float64, Speaker IDs.
genre_emb: [genre_emb_size], tf.float64, Genre
Returns: tf.float64, [num_mentions, max_ant + 1], antecedent scores.
"""
num_mentions = custom_layers.shape(mention_emb, 0)
max_antecedents = custom_layers.shape(antecedents, 1)
feature_emb_list = []
if self.use_metadata:
antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant]
same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1),
antecedent_speaker_ids) # [num_mentions, max_ant]
speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.feature_size],
dtype=tf.float64),
tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb]
feature_emb_list.append(speaker_pair_emb)
tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
[num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb]
feature_emb_list.append(tiled_genre_emb)
def get_antecedent_scores(self, mention_emb, mention_scores, antecedents, antecedents_len, mention_speaker_ids,
genre_emb):
"""
Forms a new tensor using special features, mentions embeddings, mentions scores, etc.
and passes it through a fully-connected network that compute antecedent scores.
Args:
mention_emb: [num_mentions, emb], a tensor that contains of embeddings of specific mentions
mention_scores: [num_mentions, 1], Output of the fully-connected network, that compute the mentions scores.
antecedents: [] get from C++ function
antecedents_len: [] get from C++ function
mention_speaker_ids: [num_mentions, speaker_emb_size], tf.float64, Speaker IDs.
genre_emb: [genre_emb_size], tf.float64, Genre
Returns: tf.float64, [num_mentions, max_ant + 1], antecedent scores.
"""
num_mentions = shape(mention_emb, 0)
max_antecedents = shape(antecedents, 1)
feature_emb_list = []
if self.use_metadata:
antecedent_speaker_ids = tf.gather(mention_speaker_ids, antecedents) # [num_mentions, max_ant]
same_speaker = tf.equal(tf.expand_dims(mention_speaker_ids, 1),
antecedent_speaker_ids) # [num_mentions, max_ant]
speaker_pair_emb = tf.gather(tf.get_variable("same_speaker_emb", [2, self.feature_size],
dtype=tf.float64),
tf.to_int32(same_speaker)) # [num_mentions, max_ant, emb]
feature_emb_list.append(speaker_pair_emb)
tiled_genre_emb = tf.tile(tf.expand_dims(tf.expand_dims(genre_emb, 0), 0),
[num_mentions, max_antecedents, 1]) # [num_mentions, max_ant, emb]
feature_emb_list.append(tiled_genre_emb)
mention_emb_list.append(mention_end_emb)
mention_width = 1 + mention_ends - mention_starts # [num_mentions]
if self.use_features:
mention_width_index = mention_width - 1 # [num_mentions]
mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.max_mention_width,
self.feature_size],
dtype=tf.float64),
mention_width_index) # [num_mentions, emb]
mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
mention_emb_list.append(mention_width_emb)
if self.model_heads:
mention_indices = tf.expand_dims(tf.range(self.max_mention_width), 0) + tf.expand_dims(
mention_starts, 1) # [num_mentions, max_mention_width]
mention_indices = tf.minimum(custom_layers.shape(text_outputs, 0) - 1,
mention_indices) # [num_mentions, max_mention_width]
mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
self.head_scores = custom_layers.projection(text_outputs, 1) # [num_words, 1]
mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.max_mention_width, dtype=tf.float64), 2)
# [num_mentions, max_mention_width, 1]
mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask),
dim=1) # [num_mentions, max_mention_width, 1]
mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
mention_emb_list.append(mention_head_emb)
mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
return mention_emb
mention_emb_list.append(mention_end_emb)
mention_width = 1 + mention_ends - mention_starts # [num_mentions]
if self.use_features:
mention_width_index = mention_width - 1 # [num_mentions]
mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.max_mention_width,
self.feature_size],
dtype=tf.float64),
mention_width_index) # [num_mentions, emb]
mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
mention_emb_list.append(mention_width_emb)
if self.model_heads:
mention_indices = tf.expand_dims(tf.range(self.max_mention_width), 0) + tf.expand_dims(
mention_starts, 1) # [num_mentions, max_mention_width]
mention_indices = tf.minimum(shape(text_outputs, 0) - 1,
mention_indices) # [num_mentions, max_mention_width]
mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
self.head_scores = projection(text_outputs, 1) # [num_words, 1]
mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.max_mention_width, dtype=tf.float64), 2)
# [num_mentions, max_mention_width, 1]
mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask),
dim=1) # [num_mentions, max_mention_width, 1]
mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
mention_emb_list.append(mention_head_emb)
mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
return mention_emb
mention_emb_list.append(mention_end_emb)
mention_width = 1 + mention_ends - mention_starts # [num_mentions]
if self.use_features:
mention_width_index = mention_width - 1 # [num_mentions]
mention_width_emb = tf.gather(tf.get_variable("mention_width_embeddings", [self.max_mention_width,
self.feature_size],
dtype=tf.float64),
mention_width_index) # [num_mentions, emb]
mention_width_emb = tf.nn.dropout(mention_width_emb, self.dropout)
mention_emb_list.append(mention_width_emb)
if self.model_heads:
mention_indices = tf.expand_dims(tf.range(self.max_mention_width), 0) + tf.expand_dims(
mention_starts, 1) # [num_mentions, max_mention_width]
mention_indices = tf.minimum(custom_layers.shape(text_outputs, 0) - 1,
mention_indices) # [num_mentions, max_mention_width]
mention_text_emb = tf.gather(text_emb, mention_indices) # [num_mentions, max_mention_width, emb]
self.head_scores = custom_layers.projection(text_outputs, 1) # [num_words, 1]
mention_head_scores = tf.gather(self.head_scores, mention_indices) # [num_mentions, max_mention_width, 1]
mention_mask = tf.expand_dims(tf.sequence_mask(mention_width, self.max_mention_width, dtype=tf.float64), 2)
# [num_mentions, max_mention_width, 1]
mention_attention = tf.nn.softmax(mention_head_scores + tf.log(mention_mask),
dim=1) # [num_mentions, max_mention_width, 1]
mention_head_emb = tf.reduce_sum(mention_attention * mention_text_emb, 1) # [num_mentions, emb]
mention_emb_list.append(mention_head_emb)
mention_emb = tf.concat(mention_emb_list, 1) # [num_mentions, emb]
return mention_emb
char_emb = tf.gather(
tf.get_variable("char_embeddings", [len(self.char_dict), self.char_embedding_size]),
char_index) # [num_sentences, max_sentence_length, max_word_length, emb]
flattened_char_emb = tf.reshape(char_emb, [num_sentences * max_sentence_length,
custom_layers.shape(char_emb, 2),
custom_layers.shape(char_emb, 3)])
# [num_sentences * max_sentence_length, max_word_length, emb]
flattened_aggregated_char_emb = custom_layers.cnn(flattened_char_emb, self.filter_widths,
self.filter_size)
# [num_sentences * max_sentence_length, emb]
aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb,
[num_sentences,
max_sentence_length,
custom_layers.shape(flattened_aggregated_char_emb, 1)])
# [num_sentences, max_sentence_length, emb]
text_emb_list.append(aggregated_char_emb)
text_emb = tf.concat(text_emb_list, 2)
text_emb = tf.nn.dropout(text_emb, self.lexical_dropout)
text_len_mask = tf.sequence_mask(text_len, maxlen=max_sentence_length)
text_len_mask = tf.reshape(text_len_mask, [num_sentences * max_sentence_length])
text_outputs = self.encode_sentences(text_emb, text_len, text_len_mask)
text_outputs = tf.nn.dropout(text_outputs, self.dropout)
genre_emb = tf.gather(tf.get_variable("genre_embeddings",
[len(self.genres), self.feature_size],
dtype=tf.float64),