Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
v_eps_rsqrt3 = v_eps_rsqrt1 ** 3.0
# common factors
if prop_down[0] or prop_down[2]:
dy_x0_bm_sum = F.sum(dy * x0_bm, axes, True)
dy_sum = F.sum(dy, axes, True)
if prop_down[0] or prop_down[5]:
g_dx0_x0_bm_sum = F.sum(g_dx0 * x0_bm, axes, True)
g_dx0_sum = F.sum(g_dx0, axes, True)
# w.r.t. x
if prop_down[0]:
# from dx
dv = (-1.0 / 2.0) * g0 * v_eps_rsqrt3 * \
F.sum(dy * x0_bm, axes, True)
g_dx0_dy_sum = F.sum(g_dx0 * dy, axes, True)
g1 = (-1.0 / de) * v_eps_rsqrt3 * g_dx0_dy_sum * g0 * x0_bm
g2 = (1.0 / de) * g0 * g_dx0_x0_bm_sum * v_eps_rsqrt3 * (1.0 / de * dy_sum - dy
+ (3.0 / de) * v_eps_r1 * dy_x0_bm_sum * x0_bm)
g2 += (2.0 / de) * dv * (g_dx0 - (1.0 / de) * g_dx0_sum)
g3 = (1.0 / de ** 2.0) * g_dx0_sum * \
dy_sum * g0 * v_eps_rsqrt3 * x0_bm
g_x0_ = g1 + g2 + g3
# from gamma
t1 = (dy - dy_sum / de) * v_eps_rsqrt1
t2 = (- 1.0 / de) * dy_x0_bm_sum * v_eps_rsqrt3 * x0_bm
g_x0_ += g_dg0 * (t1 + t2)
if accum[0]:
for r in range(num_routing_iter):
# u_hat is only used in the last step.
uh = u_hat_no_grad
if r == num_routing_iter - 1:
uh = u_hat
# 4: Softmax in eq 3
c = F.softmax(b, axis=1)
# 5: Left of eq 2. s shape: [B, num_j, out_channels]
s = F.sum(c * uh, axis=2)
# 6: eq 1
v = squash(s, axis=2)
if r == num_routing_iter - 1:
return u_hat, v
# 7: Update by agreement
b = b + F.sum(v.reshape((batch_size, num_j, 1, out_channels)) *
uh, axis=3, keepdims=True)
# Elbo components and loss objective #
#############################################
# Binarized input
xb = F.greater_equal_scalar(xa, 0.5)
# E_q(z|x)[log(q(z|x))]
# without some constant terms that will canceled after summation of loss
logqz = 0.5 * F.sum(1.0 + logvar, axis=1)
# E_q(z|x)[log(p(z))]
# without some constant terms that will canceled after summation of loss
logpz = 0.5 * F.sum(mu * mu + sigma * sigma, axis=1)
# E_q(z|x)[log(p(x|z))]
logpx = F.sum(F.sigmoid_cross_entropy(prob, xb), axis=(1, 2, 3))
# Vae loss, the negative evidence lowerbound
loss = F.mean(logpx + logpz - logqz)
return loss
def channel_wise_reg(param):
# L1 regularization over channel
reg = F.sum(
F.pow_scalar(F.sum(F.pow_scalar(param, 2), axis=[0, 2, 3]), 0.5))
return reg
def mnist_lenet_siamese(x0, x1, test=False):
""""""
h0 = mnist_lenet_feature(x0, test)
h1 = mnist_lenet_feature(x1, test) # share weights
# h = (h0 - h1) ** 2 # equivalent
h = F.squared_error(h0, h1)
p = F.sum(h, axis=1)
return p
def mnist_lenet_siamese(x0, x1, test=False):
""""""
h0 = mnist_lenet_feature(x0, test)
h1 = mnist_lenet_feature(x1, test) # share weights
# h = (h0 - h1) ** 2 # equivalent
h = F.squared_error(h0, h1)
p = F.sum(h, axis=1)
return p
def filter_wise_reg(param):
# L1 regularization over filter
reg = F.sum(
F.pow_scalar(F.sum(F.pow_scalar(param, 2), axis=[1, 2, 3]), 0.5))
return reg
return mlp_net(x, n_h, n_y, test)
# Net for learning labeled data
xl = nn.Variable((args.batchsize_l,) + shape_x, need_grad=False)
yl = forward(xl, test=False)
tl = nn.Variable((args.batchsize_l, 1), need_grad=False)
loss_l = F.mean(F.softmax_cross_entropy(yl, tl))
# Net for learning unlabeled data
xu = nn.Variable((args.batchsize_u,) + shape_x, need_grad=False)
yu = forward(xu, test=False)
y1 = yu.get_unlinked_variable()
y1.need_grad = False
noise = nn.Variable((args.batchsize_u,) + shape_x, need_grad=True)
r = noise / (F.sum(noise ** 2, [1, 2, 3], keepdims=True)) ** 0.5
r.persistent = True
y2 = forward(xu + args.xi_for_vat * r, test=False)
y3 = forward(xu + args.eps_for_vat * r, test=False)
loss_k = F.mean(distance(y1, y2))
loss_u = F.mean(distance(y1, y3))
# Net for evaluating validation data
xv = nn.Variable((args.batchsize_v,) + shape_x, need_grad=False)
hv = forward(xv, test=True)
tv = nn.Variable((args.batchsize_v, 1), need_grad=False)
err = F.mean(F.top_n_error(hv, tv, n=1))
# Create solver
solver = S.Adam(args.learning_rate)
solver.set_parameters(nn.get_parameters())
# Create model
# - Real batch size including context samples and negative samples
size = batchsize * (1 + n_negative) * (2 * (half_window - 1))
# Model for learning
# - input variables
xl = nn.Variable((size,)) # variable for word
yl = nn.Variable((size,)) # variable for context
# Embed layers for word embedding function
# - f_embed : word index x to get y, the n_dim vector
# -- for each sample in a minibatch
hx = PF.embed(xl, n_word, n_dim, name="e1") # feature vector for word
hy = PF.embed(yl, n_word, n_dim, name="e1") # feature vector for context
hl = F.sum(hx * hy, axis=1)
# -- Approximated likelihood of context prediction
# pos: word context, neg negative samples
tl = nn.Variable([size, ], need_grad=False)
loss = F.sigmoid_cross_entropy(hl, tl)
loss = F.mean(loss)
# Model for test of searching similar words
xr = nn.Variable((1,), need_grad=False)
hr = PF.embed(xr, n_word, n_dim, name="e1") # feature vector for test
# Create solver
solver = S.Adam(args.learning_rate)
solver.set_parameters(nn.get_parameters())
# Create monitor.