Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def make_ff_controller(opt):
b, h, m, p, k = opt.b, opt.h, opt.m, opt.p, opt.k
H = 2*h
in_size = k + h*m
out_size = H*m + H + H + H*3 + H + h*m + h*m + p
# Previous reads
r_bhm = cgt.tensor3("r", fixed_shape = (b,h,m))
# External inputs
X_bk = cgt.matrix("x", fixed_shape = (b,k))
r_b_hm = r_bhm.reshape([r_bhm.shape[0], r_bhm.shape[1]*r_bhm.shape[2]])
# Input to controller
inp_bq = cgt.concatenate([X_bk, r_b_hm], axis=1)
hid_sizes = opt.ff_hid_sizes
activation = cgt.tanh
layer_out_sizes = [in_size] + hid_sizes + [out_size]
last_out = inp_bq
# feedforward part. we could simplify a bit by using nn.Affine
for i in xrange(len(layer_out_sizes)-1):
indim = layer_out_sizes[i]
outdim = layer_out_sizes[i+1]
W = cgt.shared(.02*nr.randn(indim, outdim), name="W%i"%i, fixed_shape_mask="all")
def make_funcs(opt, ntm, total_time, loss_timesteps):
x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
loss_timesteps = set(loss_timesteps)
initial_states = make_ntm_initial_states(opt)
params = ntm.get_parameters() + get_parameters(initial_states)
# params = ntm.get_parameters()
lossCE = 0
loss01 = 0
state_arrs = initial_states
for t in xrange(total_time):
tmp = ntm([x_tbk[t]] + state_arrs)
raw_pred = tmp[0]
state_arrs = tmp[1:4]
def make_ntm(opt):
Mprev_bnm = cgt.tensor3("M", fixed_shape=(opt.b, opt.n, opt.m))
X_bk = cgt.matrix("X", fixed_shape=(opt.b, opt.k))
wprev_bHn = cgt.tensor3("w", fixed_shape=(opt.b, opt.h*2, opt.n))
rprev_bhm = cgt.tensor3("r", fixed_shape=(opt.b, opt.h, opt.m))
controller = make_ff_controller(opt)
M_bnm, w_bHn, r_bhm, y_bp = ntm_step(opt, Mprev_bnm, X_bk, wprev_bHn, rprev_bhm, controller)
# in this form it looks like a standard seq-to-seq model
# external input and output are first elements
ntm = nn.Module([X_bk, Mprev_bnm, wprev_bHn, rprev_bhm], [y_bp, M_bnm, w_bHn, r_bhm])
return ntm
def make_funcs(opt, ntm, total_time, loss_timesteps):
x_tbk = cgt.tensor3("x", fixed_shape=(total_time, opt.b, opt.k))
y_tbp = cgt.tensor3("y", fixed_shape=(total_time, opt.b, opt.p))
loss_timesteps = set(loss_timesteps)
initial_states = make_ntm_initial_states(opt)
params = ntm.get_parameters() + get_parameters(initial_states)
# params = ntm.get_parameters()
lossCE = 0
loss01 = 0
state_arrs = initial_states
for t in xrange(total_time):
tmp = ntm([x_tbk[t]] + state_arrs)
raw_pred = tmp[0]
state_arrs = tmp[1:4]
if t in loss_timesteps:
def make_loss_and_grad_and_step(arch, size_input, size_output, size_mem, size_batch, n_layers, n_unroll):
# symbolic variables
x_tnk = cgt.tensor3()
targ_tnk = cgt.tensor3()
make_network = make_deep_lstm if arch=="lstm" else make_deep_gru
network = make_network(size_input, size_mem, n_layers, size_output, size_batch)
init_hiddens = [cgt.matrix() for _ in xrange(get_num_hiddens(arch, n_layers))]
# TODO fixed sizes
cur_hiddens = init_hiddens
loss = 0
for t in xrange(n_unroll):
outputs = network([x_tnk[t]] + cur_hiddens)
cur_hiddens, prediction_logprobs = outputs[:-1], outputs[-1]
# loss = loss + nn.categorical_negloglik(prediction_probs, targ_tnk[t]).sum()
loss = loss - (prediction_logprobs*targ_tnk[t]).sum()
cur_hiddens = outputs[:-1]
final_hiddens = cur_hiddens