Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@describe.on_data(_set_dimensions_if_needed)
@describe.attributes(
nB=Dimension("Batch size"),
nI=Dimension("Input size"),
nO=Dimension("Output size"),
W=Synapses(
"Weights matrix",
lambda obj: (obj.nO, obj.nI),
lambda W, ops: ops.normal_init(W, W.shape[-1]),
),
b=Biases("Bias vector", lambda obj: (obj.nO,)),
d_W=Gradient("W"),
d_b=Gradient("b"),
)
class SELU(Model):
name = "selu"
@property
def input_shape(self):
return (self.nB, self.nI)
@property
def output_shape(self):
return (self.nB, self.nO)
def __init__(self, nO=None, nI=None, **kwargs):
Model.__init__(self, **kwargs)
self.nO = nO
self.nI = nI
self.drop_factor = kwargs.get("drop_factor", 1.0)
def _run_child_hooks(model, X, y=None):
for hook in model.child.on_data_hooks:
hook(model.child, X, y)
@describe.on_data(_run_child_hooks)
@describe.attributes(
G=describe.Weights("Scaling vector", lambda obj: (obj.nO,), _init_to_one),
b=describe.Biases("Bias vector", lambda obj: (obj.nO,)),
d_G=describe.Gradient("G"),
d_b=describe.Gradient("b"),
m=describe.Weights("Means", lambda obj: (obj.nO,)),
v=describe.Weights("Variance", lambda obj: (obj.nO,), _init_to_one),
)
class BatchNorm(Model):
name = "batchnorm"
def __init__(self, child, **kwargs):
self.child = child
self._layers = [child]
if "nO" in kwargs:
self.nO = kwargs["nO"]
elif getattr(child, "nO", None):
self.nO = child.nO
self.nr_upd = 0
self.eps = kwargs.get("eps", 1e-5)
self.alpha = self.ops.xp.asarray([0.1], dtype="float32")
self.rmax = kwargs.get("rmax", 3.0)
self.dmax = kwargs.get("dmax", 5.0)
Model.__init__(self, **kwargs)
self.norm = PyTorchWrapper(PytorchLayerNorm(nM=nM, device=device))
def begin_update(self, input, drop=0.1):
X0, mask = input
(X1, _), b_X1 = self.stack.begin_update((X0, mask), drop=0.1)
X2, b_X2 = self.norm.begin_update(X1)
def finish_update(dX2, sgd=None):
dX1 = b_X2(dX2, sgd=sgd)
dX0 = b_X1(dX1, sgd=sgd)
return dX0
return X2, finish_update
class EncoderLayer(Model):
def __init__(self, nM=300, nH=6, device="cpu"):
Model.__init__(self)
self.attn = MultiHeadedAttention(nM=nM, nH=nH)
self.ffd = PositionwiseFeedForward(nM, 4 * nM)
self.norm = PyTorchWrapper(PytorchLayerNorm(nM, device=device))
self.nM = nM
self.layers_ = [self.attn, self.ffd, self.norm]
def begin_update(self, input, drop=0.1):
X0, mask = input
X1, b_X1 = self.attn.begin_update((X0, mask, None), drop=drop)
X2, b_X2 = self.norm.begin_update(X1)
X3 = X0 + X2
X4, b_X4 = self.ffd.begin_update(X3, drop=drop)
X5, b_X5 = self.norm.begin_update(X4)
# coding: utf8
from __future__ import unicode_literals
from .model import Model
class Residual(Model):
def __init__(self, layer):
Model.__init__(self)
self._layers.append(layer)
self.on_data_hooks.append(on_data)
@property
def nO(self):
return self._layers[-1].nO
def predict(self, X):
Y = self._layers[0](X)
if isinstance(X, list) or isinstance(X, tuple):
return [X[i] + Y[i] for i in range(len(X))]
elif isinstance(X, tuple) and isinstance(Y, tuple) and len(X) == 2:
assert X[1].sum() == Y[1].sum()
assert Y[1].sum() == Y[0].shape[0], (Y[1].sum(), Y[0].shape[0])
def __init__(self, nO, nI):
Model.__init__(self)
self.nO = nO
self.nI = nI
dX5 = dX6
dX4 = b_X5(dX5, sgd=sgd)
dX3 = b_X4(dX4, sgd=sgd)
dX3 += dX6
dX2 = dX3
dX1 = b_X2(dX2, sgd=sgd)
dX0 = b_X1(dX1, sgd=sgd)
dX0 += dX3
return X0
return (X6, mask), finish_update
class DecoderLayer(Model):
def __init__(self, nM=300, nH=6, device="cpu"):
Model.__init__(self)
self.y_attn = MultiHeadedAttention(nM=nM, nH=nH)
self.x_attn = MultiHeadedAttention(nM=nM, nH=nH)
self.norm = PyTorchWrapper(PytorchLayerNorm(nM, device=device))
self.ffd = PositionwiseFeedForward(nM, 4 * nM)
self.layers_ = [self.norm, self.y_attn, self.x_attn, self.ffd]
def begin_update(self, input, drop=0.1):
Y0, X0, X_mask, Y_mask = input
Y1, b_Y1 = self.y_attn.begin_update((Y0, Y_mask, None), drop=drop)
Y2, b_Y2 = self.norm.begin_update(Y1)
Y3 = Y0 + Y2
Y4, b_Y4 = self.x_attn.begin_update((Y3, X0, X_mask, None, None), drop=drop)
Y5, b_Y5 = self.norm.begin_update(Y4)
Y6 = Y3 + Y5
from ...api import chain, clone, with_getitem, wrap, with_reshape
from .softmax import Softmax
from .relu import ReLu
from .layernorm import LayerNorm
from .maxout import Maxout
from .resnet import Residual
from .affine import Affine
from .multiheaded_attention import MultiHeadedAttention
from .positionwise_ffd import PositionwiseFeedForward
from ...extra.wrappers import PyTorchWrapper, PyTorchModule
import copy
import math
import numpy as np
class EncoderDecoder(Model):
def __init__(self, nS=1, nH=6, nM=300, nTGT=10000, device="cpu"):
"""
EncoderDecoder consists of an encoder stack, a decoder stack and an
output layer which is a linear + softmax.
Parameters explanation:
nS: the number of encoders/decoders in the stack
nH: the number of heads in the multiheaded attention
nM: the token's embedding size
nTGT: the number of unique words in output vocabulary
"""
Model.__init__(self)
self.nS = nS
self.nH = nH
self.nM = nM
self.nTGT = nTGT
self.device = device
def max_pool(X_lengths, drop=0.0):
X, lengths = X_lengths
ops = Model.ops
best, which = ops.max_pool(X, lengths)
def finish_update(d_output, sgd=None):
d_output = ops.xp.ascontiguousarray(d_output)
return ops.backprop_max_pool(d_output, which, lengths)
return best, finish_update
def _get_mask(X, nX):
nB = X.shape[0]
nL = X.shape[1]
X_mask = Model.ops.allocate((nB, nL, nL))
for i, length in enumerate(nX):
X_mask[i, :, :length] = 1.0
return X_mask