Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
_units = tf_layers.variational_dropout(_units,
keep_prob=self._dropout_keep_prob)
# recurrent network unit
_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
_utter_lengths = tf.cast(tf.reduce_sum(self._utterance_mask, axis=-1),
tf.int32)
# _output: [batch_size, max_time, hidden_size]
# _state: tuple of two [batch_size, hidden_size]
_output, _state = tf.nn.dynamic_rnn(_lstm_cell,
_units,
time_major=False,
initial_state=self._initial_state,
sequence_length=_utter_lengths)
_output = tf.reshape(_output, (self._batch_size, -1, self.hidden_size))
_output = tf_layers.variational_dropout(_output,
keep_prob=self._dropout_keep_prob)
# output projection
_logits = tf.layers.dense(_output, self.action_size,
kernel_regularizer=tf.nn.l2_loss,
kernel_initializer=xav(), name='logits')
return _logits, _state
def _build_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
for n, n_hidden in enumerate(n_hidden_list):
units, _ = bi_rnn(units, n_hidden, cell_type=cell_type,
seq_lengths=sequence_lengths, name='Layer_' + str(n))
units = tf.concat(units, -1)
if intra_layer_dropout and n != len(n_hidden_list) - 1:
units = variational_dropout(units, self._dropout_ph)
return units
if not self.use_reattention:
match = dot_attention(qc_att, qc_att, mask=self.c_mask, att_size=self.attention_hidden_size,
keep_prob=self.keep_prob_ph, use_gate=self.use_gated_attention,
drop_diag=self.drop_diag_self_att, use_transpose_att=False,
concat_inputs=self.concat_att_inputs)
else:
match, B = dot_reattention(qc_att, qc_att, memory_mask=self.c_mask,
inputs_mask=self.c_mask, att_size=self.attention_hidden_size,
E=B, B=B, drop_diag=self.drop_diag_self_att,
keep_prob=self.keep_prob_ph, concat_inputs=self.concat_att_inputs)
if self.use_highway_after_selfatt:
# Z
# match = tf.layers.batch_normalization(match, training=self.is_train_ph)
match = highway_layer(variational_dropout(qc_att, keep_prob=self.keep_prob_ph),
variational_dropout(match, keep_prob=self.keep_prob_ph),
use_combinations=True, regularizer=tf.nn.l2_loss)
if self.use_birnn_after_selfatt:
# R
rnn = self.GRU(num_layers=self.num_match_layers, num_units=self.hidden_size, batch_size=self.bs,
input_size=match.get_shape().as_list()[-1],
keep_prob=self.keep_prob_ph, share_layers=self.share_layers)
match = rnn(match, seq_len=self.c_len, concat_layers=self.concat_bigru_outputs)
context_representations.append(match)
if self.number_of_hops == 1:
final_context_repr = context_representations[-1]
else:
with tf.variable_scope('aggregation'):
context_representations = tf.concat(context_representations, axis=-1)
def _build_cudnn_rnn(self, units, n_hidden_list, cell_type, intra_layer_dropout, mask):
sequence_lengths = tf.to_int32(tf.reduce_sum(mask, axis=1))
for n, n_hidden in enumerate(n_hidden_list):
with tf.variable_scope(cell_type.upper() + '_' + str(n)):
if cell_type.lower() == 'lstm':
units, _ = cudnn_bi_lstm(units, n_hidden, sequence_lengths)
elif cell_type.lower() == 'gru':
units, _ = cudnn_bi_gru(units, n_hidden, sequence_lengths)
else:
raise RuntimeError('Wrong cell type "{}"! Only "gru" and "lstm"!'.format(cell_type))
units = tf.concat(units, -1)
if intra_layer_dropout and n != len(n_hidden_list) - 1:
units = variational_dropout(units, self._dropout_ph)
return units
_attn_output = am.light_general_attention(
self._key,
self._emb_context,
hidden_size=self.attn.hidden_size,
projected_align=self.attn.projected_align)
elif self.attn.type == 'light_bahdanau':
_attn_output = am.light_bahdanau_attention(
self._key,
self._emb_context,
hidden_size=self.attn.hidden_size,
projected_align=self.attn.projected_align)
else:
raise ValueError("wrong value for attention mechanism type")
_units = tf.concat([_units, _attn_output], -1)
_units = tf_layers.variational_dropout(_units,
keep_prob=self._dropout_keep_prob)
# recurrent network unit
_lstm_cell = tf.nn.rnn_cell.LSTMCell(self.hidden_size)
_utter_lengths = tf.cast(tf.reduce_sum(self._utterance_mask, axis=-1),
tf.int32)
# _output: [batch_size, max_time, hidden_size]
# _state: tuple of two [batch_size, hidden_size]
_output, _state = tf.nn.dynamic_rnn(_lstm_cell,
_units,
time_major=False,
initial_state=self._initial_state,
sequence_length=_utter_lengths)
_output = tf.reshape(_output, (self._batch_size, -1, self.hidden_size))
_output = tf_layers.variational_dropout(_output,
keep_prob=self._dropout_keep_prob)
for i in range(self.number_of_hops):
with tf.variable_scope('co-attention_{}'.format(i)):
if not self.use_reattention:
qc_att = dot_attention(context_representations[-1], q, mask=self.q_mask,
att_size=self.attention_hidden_size, keep_prob=self.keep_prob_ph,
use_gate=self.use_gated_attention, use_transpose_att=self.use_transpose_att,
concat_inputs=self.concat_att_inputs)
else:
qc_att, E = dot_reattention(context_representations[-1], q, memory_mask=self.q_mask,
inputs_mask=self.c_mask, att_size=self.attention_hidden_size,
E=E, B=B,
keep_prob=self.keep_prob_ph, concat_inputs=self.concat_att_inputs)
if self.use_highway_after_coatt:
# qc_att = tf.layers.batch_normalization(qc_att, training=self.is_train_ph)
qc_att = highway_layer(variational_dropout(context_representations[-1], keep_prob=self.keep_prob_ph),
variational_dropout(qc_att, keep_prob=self.keep_prob_ph),
use_combinations=True, regularizer=tf.nn.l2_loss)
if self.use_birnn_after_coatt:
rnn = self.GRU(num_layers=self.num_match_layers, num_units=self.hidden_size, batch_size=self.bs,
input_size=qc_att.get_shape().as_list()[-1],
keep_prob=self.keep_prob_ph, share_layers=self.share_layers)
qc_att = rnn(qc_att, seq_len=self.c_len, concat_layers=self.concat_bigru_outputs)
with tf.variable_scope('self-attention_{}'.format(i)):
if not self.use_reattention:
match = dot_attention(qc_att, qc_att, mask=self.c_mask, att_size=self.attention_hidden_size,
keep_prob=self.keep_prob_ph, use_gate=self.use_gated_attention,
drop_diag=self.drop_diag_self_att, use_transpose_att=False,
concat_inputs=self.concat_att_inputs)
else:
def dot_reattention(inputs, memory, memory_mask, inputs_mask, att_size, E=None, B=None, gamma_init=3, keep_prob=1.0,
drop_diag=False, concat_inputs=False, scope="dot_reattention"):
# check reinforced mnemonic reader paper for more info about E, B and re-attention
with tf.variable_scope(scope):
BS, IL, IH = tf.unstack(tf.shape(inputs))
BS, ML, MH = tf.unstack(tf.shape(memory))
d_inputs = variational_dropout(inputs, keep_prob=keep_prob)
d_memory = variational_dropout(memory, keep_prob=keep_prob)
with tf.variable_scope("attention"):
inputs_att = tf.layers.dense(d_inputs, att_size, use_bias=False,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),
kernel_regularizer=tf.nn.l2_loss)
memory_att = tf.layers.dense(d_memory, att_size, use_bias=False,
activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.variance_scaling_initializer(),
kernel_regularizer=tf.nn.l2_loss)
# BS x IL x ML
logits = tf.matmul(inputs_att, tf.transpose(memory_att, [0, 2, 1])) / (att_size ** 0.5)
if E is not None and B is not None:
gamma = tf.Variable(gamma_init, dtype=tf.float32, trainable=True, name='gamma')
E_softmax = tf.nn.softmax(softmax_mask(E,
def mnemonic_reader_answer_selection(q, context_repr, q_mask, c_mask, att_hidden_size, keep_prob, with_poolings=False):
q_mask = tf.cast(q_mask, tf.float32)
q_att = simple_attention(q, att_hidden_size, mask=q_mask, keep_prob=keep_prob)
if with_poolings:
q_mask_expand = tf.expand_dims(q_mask, axis=-1)
q_max_pool = tf.reduce_max(softmax_mask(q, mask=q_mask_expand), axis=1)
q_avg_pool = tf.reduce_sum(q * q_mask_expand, axis=1) / tf.expand_dims(tf.reduce_sum(q_mask, axis=-1), axis=-1)
init_state = tf.concat([q_att, q_max_pool, q_avg_pool], axis=-1)
else:
init_state = q_att
state = tf.layers.dense(init_state, units=context_repr.get_shape().as_list()[-1],
kernel_regularizer=tf.nn.l2_loss)
context_repr = variational_dropout(context_repr, keep_prob=keep_prob)
state = tf.nn.dropout(state, keep_prob=keep_prob)
att, logits_st = attention(context_repr, state, att_hidden_size, c_mask, use_combinations=True, scope='st_att')
state = highway_layer(state, att, use_combinations=True, regularizer=tf.nn.l2_loss)
state = tf.nn.dropout(state, keep_prob=keep_prob)
_, logits_end = attention(context_repr, state, att_hidden_size, c_mask, use_combinations=True, scope='end_att')
return logits_st, logits_end
def _build_encoder(self, scope="Encoder"):
with tf.variable_scope(scope):
# _units: [batch_size, max_input_time, embedding_size]
_units = variational_dropout(self._encoder_inputs, self._dropout_keep_prob)
# _outputs: [batch_size, max_input_time, embedding_size, 2]
# _state: [batch_size, hidden_size, 2]
if self.encoder_use_cudnn:
if (self.l2_regs[0] > 0) or (self.l2_regs[1] > 0):
log.warning("cuDNN RNN are not l2 regularizable")
if self.encoder_cell_type.lower() == 'lstm':
_outputs, _state = cudnn_bi_lstm(_units,
self.hidden_size,
self._src_sequence_lengths)
elif self.encoder_cell_type.lower() == 'gru':
_outputs, _state = cudnn_bi_gru(_units,
self.hidden_size,
self._src_sequence_lengths)
else:
_outputs, _state = bi_rnn(_units,