Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
K = 3
Xval = np.random.randn(N,K)
wval = np.random.randn(K)
bval = np.random.randn()
yval = np.random.randn(N)
X_nk = cgt.matrix("X")
y_n = cgt.vector("y")
w_k = cgt.vector("w")
b = cgt.scalar(name="b")
ypred = cgt.dot(X_nk, w_k) + b
err = cgt.sum(cgt.square(ypred - y_n))
g = cgt.grad(err, [w_k, b])
g_simple,an = cgt.core.simplify_and_analyze(g)
print "Loss function:"
cgt.print_tree([err])
print "Gradient:"
cgt.print_tree(g)
print "Gradient simplified"
cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))
print "-------"
d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}
Xval = np.random.randn(N,K)
wval = np.random.randn(K)
bval = np.random.randn()
yval = np.random.randn(N)
X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
y_n = cgt.shared(yval, "y")
w_k = cgt.shared(wval, "w")
b = cgt.shared(bval, name="b")
print "bval",bval
ypred = cgt.dot(cgt.square(X_nk), w_k) + b
err = cgt.sum(cgt.square(ypred - y_n))
g = cgt.grad(err, [w_k, b])
outputs = [err]
def devfn(node):
if isinstance(node, cgt.Result) and node.op == err.op: # XXX add fn for this
return cgt.Device(devtype="cpu")
func=cgt.VarSizeFunc([], outputs, devfn = devfn)
def writedev(node,o):
o.write(" | device: %s"%func.node2device[node])
cgt.print_tree(func.outputs, nodefn=writedev)
print "ready..."
numerr = func()
if key == "conj":
print "skipping conj"
continue
utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key)
if cls == core.ElwiseUnary:
n_in = 1
op = cls(key)
else:
n_in = 2
op = cls(key, (True,True))
inputvars = vars[0:n_in]
inputvals = vals[0:n_in]
out = core.Result(op, inputvars)
f = cgt.function(inputvars, out)
try:
grads = cgt.grad(out, inputvars)
except core.NonDifferentiable:
print "nondiff"
continue
if DISPLAY:
print "Function:"
cgt.print_tree(out)
print "Gradient original:"
cgt.print_tree(grads)
print "Gradient simplified:"
grads_simple = core.simplify(grads)
if DISPLAY: cgt.print_tree(grads_simple)
gradf = cgt.function(inputvars, grads)
nugrad = numeric_grad(lambda li: f(*li), inputvals) #pylint: disable=W0640
cgtgrad = gradf(*inputvals)
np.testing.assert_almost_equal(nugrad,cgtgrad,decimal=6)
self.x = cgt.matrix("x", dtype=cgt.floatX)
self.eps = cgt.matrix("eps", dtype=cgt.floatX)
self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps)
if dec == "bernoulli":
# log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y)
self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
elif dec == "gaussian":
self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
else:
raise RuntimeError("unrecognized decoder %" % dec)
self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size
self.params = self.enc_mlp.params + self.dec_mlp.params
# L2 regularization
self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params]
self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params]
# XXX replace w/ adagrad update from nn
ADAGRAD_EPS = 1e-10 # for stability
self.updates = [
(param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS))
for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums)
]
self.updates += [
(gaccum, gaccum + cgt.square(gparam))
for gaccum, gparam in zip(self.gaccums, self.gparams)
]
self.train = cgt.function(
[self.x, self.eps],
self.cost,
"""Stochastic Gradient Descent (SGD) updates
Math:
* ``param := param - learning_rate * gradient``
Parameters
----------
cost : a scalar loss.
params : a list of cgt shared variables. We generate update
expressions w.r.t. these variables.
learning_rate : float
Returns
-------
list of tuples of the form (param, new_param)
"""
updates = []
grads = cgt.grad(cost, params)
for param, grad in zip(params, grads):
updates.append((param, param - learning_rate * grad))
return updates
def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
grads = cgt.grad(cost, params)
updates = []
for p, g in zip(params, grads):
acc = cgt.shared(p.op.get_value() * 0.)
acc_new = rho * acc + (1 - rho) * cgt.square(g)
gradient_scaling = cgt.sqrt(acc_new + epsilon)
g = g / gradient_scaling
updates.append((acc, acc_new))
updates.append((p, p - stepsize * g))
return updates
state_arrs = initial_states
for t in xrange(total_time):
tmp = ntm([x_tbk[t]] + state_arrs)
raw_pred = tmp[0]
state_arrs = tmp[1:4]
if t in loss_timesteps:
p_pred = cgt.sigmoid(raw_pred)
ce = bernoulli_crossentropy(y_tbp[t] , p_pred).sum() # cross-entropy of bernoulli distribution
lossCE = lossCE + ce
loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),cgt.floatX).sum()
lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
gradloss = cgt.grad(lossCE, params)
flatgrad = flatcat(gradloss)
f_loss = cgt.function([x_tbk, y_tbp], lossCE)
f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])
print "number of nodes in computation graph:", core.count_nodes([lossCE, loss01, flatgrad])
return f_loss, f_loss_and_grad, params
def make_updater_convnet_parallel():
X = cgt.tensor4("X", fixed_shape=(None,1,28,28)) # so shapes can be inferred
y = cgt.vector("y",dtype='i8')
stepsize = cgt.scalar("stepsize")
loss = build_convnet_return_loss(X, y)
m = nn.Module([X,y], [loss])
split_loss = 0
for start in xrange(0, batch_size, batch_size//4):
sli = slice(start, start+batch_size//4)
split_loss += m([X[sli], y[sli]])[0]
split_loss /= 4
params = nn.get_parameters(loss)
gparams = cgt.grad(split_loss, params)
updates2 = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
return cgt.function([X,y, stepsize], split_loss, updates=updates2)
Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]),cgt.floatX)
M = Minit_nk
for t in xrange(horizon):
M = cell(M, X_tnk[t])
# cgt.print_tree(M)
print "simplifying..."
M_simp = cgt.simplify([M])
print "done"
# cgt.print_tree(M_simp)
print "fn before:",cgt.count_nodes(M)
print "fn after:",cgt.count_nodes(M_simp)
gs = cgt.grad(cgt.sum(M), cell.params())
print "grad before", cgt.count_nodes(gs)
g_simp = cgt.simplify(gs)
print "grad after",cgt.count_nodes(g_simp)
# M = cgt.simplify(M)
elapsed.append(time()-tstart)
import matplotlib.pyplot as plt
plt.plot(horizons,elapsed,'x-')
plt.show()