How to use the cgt.grad function in cgt

To help you get started, we’ve selected a few cgt examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github joschu / cgt / test / test_linreg.py View on Github external
K = 3

        Xval = np.random.randn(N,K)
        wval = np.random.randn(K)
        bval = np.random.randn()
        yval = np.random.randn(N)

        X_nk = cgt.matrix("X")
        y_n = cgt.vector("y")
        w_k = cgt.vector("w")
        b = cgt.scalar(name="b")

        ypred = cgt.dot(X_nk, w_k) + b

        err = cgt.sum(cgt.square(ypred - y_n))
        g = cgt.grad(err, [w_k, b])

        g_simple,an = cgt.core.simplify_and_analyze(g)


        print "Loss function:"
        cgt.print_tree([err])
        print "Gradient:"
        cgt.print_tree(g)

        print "Gradient simplified"
        cgt.print_tree(g_simple, nodefn=lambda node,o: o.write(" " + an["node2hash"][node][:5]))

        print "-------"

        d = {X_nk : Xval, w_k : wval, b : bval, y_n : yval}
github joschu / cgt / test / _test_devices.py View on Github external
Xval = np.random.randn(N,K)
        wval = np.random.randn(K)
        bval = np.random.randn()
        yval = np.random.randn(N)

        X_nk = cgt.shared(Xval, "X", device=cgt.Device(devtype='gpu'))
        y_n = cgt.shared(yval, "y")
        w_k = cgt.shared(wval, "w")
        b = cgt.shared(bval, name="b")

        print "bval",bval

        ypred = cgt.dot(cgt.square(X_nk), w_k) + b

        err = cgt.sum(cgt.square(ypred - y_n))
        g = cgt.grad(err, [w_k, b])
        outputs = [err]
        def devfn(node):
            if isinstance(node, cgt.Result) and node.op == err.op: # XXX add fn for this
                return cgt.Device(devtype="cpu")

        func=cgt.VarSizeFunc([], outputs, devfn = devfn)

        def writedev(node,o):
            o.write(" | device: %s"%func.node2device[node])

        cgt.print_tree(func.outputs, nodefn=writedev)



        print "ready..."
        numerr = func()
github joschu / cgt / test / test_scalars.py View on Github external
if key == "conj":
                print "skipping conj"
                continue
            utils.colorprint(utils.Color.YELLOW, "Testing %s\n"%key)
            if cls == core.ElwiseUnary:
                n_in = 1
                op = cls(key)
            else:
                n_in = 2
                op = cls(key, (True,True))
            inputvars = vars[0:n_in]
            inputvals = vals[0:n_in]
            out = core.Result(op, inputvars)
            f = cgt.function(inputvars, out)
            try:
                grads = cgt.grad(out, inputvars)
            except core.NonDifferentiable:
                print "nondiff"
                continue
            if DISPLAY:
                print "Function:"
                cgt.print_tree(out)
                print "Gradient original:"
                cgt.print_tree(grads)
                print "Gradient simplified:"
            grads_simple = core.simplify(grads)
            if DISPLAY: cgt.print_tree(grads_simple)
            gradf = cgt.function(inputvars, grads)
            nugrad = numeric_grad(lambda li: f(*li), inputvals) #pylint: disable=W0640
            cgtgrad = gradf(*inputvals)
            np.testing.assert_almost_equal(nugrad,cgtgrad,decimal=6)
github joschu / cgt / examples / demo_variational_autoencoder.py View on Github external
self.x = cgt.matrix("x", dtype=cgt.floatX)
        self.eps = cgt.matrix("eps", dtype=cgt.floatX)

        self.enc_mlp = GaussianMLP(self.x, self.xdim, self.hdim, self.zdim, nlayers=args.nlayers, eps=self.eps)
        if dec == "bernoulli":
            # log p(x | z) defined as -CE(x, y) = dec_mlp.cost(y)
            self.dec_mlp = BernoulliMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        elif dec == "gaussian":
            self.dec_mlp = GaussianMLP(self.enc_mlp.out, self.zdim, self.hdim, self.xdim, nlayers=args.nlayers, y=self.x)
        else:
            raise RuntimeError("unrecognized decoder %" % dec)

        self.cost = (-cgt.sum(kld_unit_mvn(self.enc_mlp.mu, self.enc_mlp.var)) + self.dec_mlp.cost) / args.batch_size
        self.params = self.enc_mlp.params + self.dec_mlp.params
        # L2 regularization
        self.gparams = [cgt.grad(self.cost, [p])[0] + self.lmbda * p for p in self.params]
        self.gaccums = [cgt.shared(np.zeros(p.op.get_value().shape, dtype=cgt.floatX)) for p in self.params]

        # XXX replace w/ adagrad update from nn
        ADAGRAD_EPS = 1e-10  # for stability
        self.updates = [
            (param, param - args.lr * gparam / cgt.sqrt(gaccum + cgt.square(gparam) + ADAGRAD_EPS))
            for param, gparam, gaccum in zip(self.params, self.gparams, self.gaccums)
        ]
        self.updates += [
            (gaccum, gaccum + cgt.square(gparam))
            for gaccum, gparam in zip(self.gaccums, self.gparams)
        ]

        self.train = cgt.function(
            [self.x, self.eps],
            self.cost,
github joschu / cgt / cgt / nn.py View on Github external
"""Stochastic Gradient Descent (SGD) updates
    Math:
    * ``param := param - learning_rate * gradient``
    Parameters
    ----------
    cost : a scalar loss.
    params : a list of cgt shared variables. We generate update
            expressions w.r.t. these variables.
    learning_rate : float

    Returns
    -------
    list of tuples of the form (param, new_param)
    """
    updates = []
    grads = cgt.grad(cost, params)
    for param, grad in zip(params, grads):
        updates.append((param, param - learning_rate * grad))

    return updates
github joschu / cgt / examples / demo_cifar.py View on Github external
def rmsprop_updates(cost, params, stepsize=0.001, rho=0.9, epsilon=1e-6):
    grads = cgt.grad(cost, params)
    updates = []
    for p, g in zip(params, grads):
        acc = cgt.shared(p.op.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * cgt.square(g)
        gradient_scaling = cgt.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - stepsize * g))
    return updates
github joschu / cgt / examples / demo_neural_turing_machine.py View on Github external
state_arrs = initial_states
    for t in xrange(total_time):
        tmp = ntm([x_tbk[t]] + state_arrs)
        raw_pred = tmp[0]
        state_arrs = tmp[1:4]

        if t in loss_timesteps:
            p_pred = cgt.sigmoid(raw_pred)
            ce = bernoulli_crossentropy(y_tbp[t] , p_pred).sum() # cross-entropy of bernoulli distribution
            lossCE = lossCE + ce
            loss01 = loss01 + cgt.cast(cgt.equal(y_tbp[t], round01(p_pred)),cgt.floatX).sum()


    lossCE = lossCE / (len(loss_timesteps) * opt.p * opt.b) / np.log(2)
    loss01 = loss01 / (len(loss_timesteps) * opt.p * opt.b)
    gradloss = cgt.grad(lossCE, params)

    flatgrad = flatcat(gradloss)

    f_loss = cgt.function([x_tbk, y_tbp], lossCE)
    f_loss_and_grad = cgt.function([x_tbk, y_tbp], [lossCE, loss01, flatgrad])

    print "number of nodes in computation graph:", core.count_nodes([lossCE, loss01, flatgrad])

    return f_loss, f_loss_and_grad, params
github joschu / cgt / examples / cgt_theano_feedforward_comparison.py View on Github external
def make_updater_convnet_parallel():
        X = cgt.tensor4("X", fixed_shape=(None,1,28,28)) # so shapes can be inferred
        y = cgt.vector("y",dtype='i8')
        stepsize = cgt.scalar("stepsize")
        loss = build_convnet_return_loss(X, y)

        m = nn.Module([X,y], [loss])
        split_loss = 0
        for start in xrange(0, batch_size, batch_size//4):
            sli = slice(start, start+batch_size//4)
            split_loss += m([X[sli], y[sli]])[0]
        split_loss /= 4
        params = nn.get_parameters(loss)
        gparams = cgt.grad(split_loss, params)
        updates2 = [(p, p-stepsize*gp) for (p, gp) in zip(params, gparams)]
        return cgt.function([X,y, stepsize], split_loss, updates=updates2)
github joschu / cgt / examples / bench / seq_model.py View on Github external
Minit_nk = cgt.zeros((X_tnk.shape[0], X_tnk.shape[1]),cgt.floatX)
    M = Minit_nk

    for t in xrange(horizon):
        M = cell(M, X_tnk[t])

    # cgt.print_tree(M)
    print "simplifying..."
    M_simp = cgt.simplify([M])
    print "done"
    # cgt.print_tree(M_simp)
    print "fn before:",cgt.count_nodes(M)
    print "fn after:",cgt.count_nodes(M_simp)

    gs = cgt.grad(cgt.sum(M), cell.params())
    print "grad before", cgt.count_nodes(gs)
    g_simp = cgt.simplify(gs)
    print "grad after",cgt.count_nodes(g_simp)

    # M = cgt.simplify(M)
    elapsed.append(time()-tstart)

import matplotlib.pyplot as plt
plt.plot(horizons,elapsed,'x-')
plt.show()