Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
b=Biases("Bias vector", lambda obj: (obj.nO * 4,)),
forget_bias=Biases(
"Bias for forget gates",
lambda obj: (obj.nO,),
lambda b, ops: copy_array(b, ops.xp.ones(b.shape, dtype=b.dtype)),
),
d_W=Gradient("W"),
d_b=Gradient("b"),
d_forget_bias=Gradient("forget_bias"),
initial_hiddens=Biases(
"Initial hiddens", lambda obj: (obj.nO,), _uniform_init(-0.1, 0.1)
),
initial_cells=Biases(
"Initial cells", lambda obj: (obj.nO,), _uniform_init(-0.1, 0.1)
),
d_initial_hiddens=Gradient("initial_hiddens"),
d_initial_cells=Gradient("initial_cells"),
)
class LSTM_weights(Model):
def __init__(self, nO, nI):
Model.__init__(self)
self.nO = nO
self.nI = nI
def begin_update(self, inputs_hidden, drop=0.0):
inputs, hidden = inputs_hidden
assert inputs.dtype == "float32"
X = self.ops.xp.hstack([inputs, hidden])
acts = self.ops.gemm(X, self.W, trans2=True) + self.b
acts = self._split_activations(acts)
acts[0] += self.forget_bias
model.nO = int(y.max()) + 1
@describe.on_data(_set_dimensions_if_needed)
@describe.attributes(
nB=Dimension("Batch size"),
nI=Dimension("Input size"),
nO=Dimension("Output size"),
W=Synapses(
"Weights matrix",
lambda obj: (obj.nO, obj.nI),
lambda W, ops: ops.normal_init(W, W.shape[-1]),
),
b=Biases("Bias vector", lambda obj: (obj.nO,)),
d_W=Gradient("W"),
d_b=Gradient("b"),
)
class SELU(Model):
name = "selu"
@property
def input_shape(self):
return (self.nB, self.nI)
@property
def output_shape(self):
return (self.nB, self.nO)
def __init__(self, nO=None, nI=None, **kwargs):
Model.__init__(self, **kwargs)
self.nO = nO
self.nI = nI
else:
model.nO = int(y.max()) + 1
@describe.on_data(_set_dimensions_if_needed)
@describe.attributes(
nB=Dimension("Batch size"),
nI=Dimension("Input size"),
nO=Dimension("Output size"),
W=Synapses(
"Weights matrix",
lambda obj: (obj.nO, obj.nI),
lambda W, ops: ops.xavier_uniform_init(W),
),
b=Biases("Bias vector", lambda obj: (obj.nO,)),
d_W=Gradient("W"),
d_b=Gradient("b"),
)
class Mish(Model):
"""Dense layer with mish activation.
https://arxiv.org/pdf/1908.08681.pdf
"""
name = "mish"
@property
def input_shape(self):
return (self.nB, self.nI)
@property
def output_shape(self):
return (self.nB, self.nO)
@describe.on_data(LSUVinit)
@describe.attributes(
nM=Dimension("Vector dimensions"),
nV=Dimension("Number of vectors"),
nO=Dimension("Size of output"),
W=Synapses(
"A projection matrix, to change vector dimensionality",
lambda obj: (obj.nO, obj.nM),
lambda W, ops: ops.xavier_uniform_init(W),
),
vectors=Weights(
"Embedding table", lambda obj: (obj.nV, obj.nM), _uniform_init(-0.1, 0.1)
),
d_W=Gradient("W"),
d_vectors=Gradient("vectors"),
)
class Embed(Model):
name = "embed"
@check.arg(1, is_int)
def __init__(self, nO, nM=None, nV=None, **kwargs):
Model.__init__(self, **kwargs)
self.is_static = kwargs.get("is_static", False)
self.column = kwargs.get("column", 0)
self.nO = nO
self.nM = nM
self.nV = nV
# @check.arg(1, is_int_array)
def predict(self, ids):
nO=Dimension("Output size"),
nI=Dimension("Input size"),
W=Synapses(
"Weights matrix",
lambda obj: (obj.nO * 4, obj.nI + obj.nO),
lambda W, ops: copy_array(W, svd_orthonormal(W.shape)),
),
b=Biases("Bias vector", lambda obj: (obj.nO * 4,)),
forget_bias=Biases(
"Bias for forget gates",
lambda obj: (obj.nO,),
lambda b, ops: copy_array(b, ops.xp.ones(b.shape, dtype=b.dtype)),
),
d_W=Gradient("W"),
d_b=Gradient("b"),
d_forget_bias=Gradient("forget_bias"),
initial_hiddens=Biases(
"Initial hiddens", lambda obj: (obj.nO,), _uniform_init(-0.1, 0.1)
),
initial_cells=Biases(
"Initial cells", lambda obj: (obj.nO,), _uniform_init(-0.1, 0.1)
),
d_initial_hiddens=Gradient("initial_hiddens"),
d_initial_cells=Gradient("initial_cells"),
)
class LSTM_weights(Model):
def __init__(self, nO, nI):
Model.__init__(self)
self.nO = nO
self.nI = nI
def begin_update(self, inputs_hidden, drop=0.0):
)
@describe.attributes(
nI=Dimension("Input size"),
nF=Dimension("Number of features"),
nO=Dimension("Output size"),
nP=Dimension("Maxout pieces"),
W=Synapses("Weights matrix", lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI)),
b=Biases("Bias vector", lambda obj: (obj.nO, obj.nP)),
pad=Synapses(
"Pad",
lambda obj: (1, obj.nF, obj.nO, obj.nP),
lambda M, ops: ops.normal_init(M, 1.0),
),
d_W=Gradient("W"),
d_pad=Gradient("pad"),
d_b=Gradient("b"),
)
class PrecomputableAffine(Model):
def __init__(self, nO=None, nI=None, nF=None, nP=None, **kwargs):
Model.__init__(self, **kwargs)
self.nO = nO
self.nP = nP
self.nI = nI
self.nF = nF
def begin_update(self, X, drop=0.0):
Yf = self.ops.gemm(
X, self.W.reshape((self.nF * self.nO * self.nP, self.nI)), trans2=True
)
Yf = Yf.reshape((Yf.shape[0], self.nF, self.nO, self.nP))
Yf = self._add_padding(Yf)
model.nO = int(y.max()) + 1
@describe.on_data(_set_dimensions_if_needed)
@describe.attributes(
nB=Dimension("Batch size"),
nI=Dimension("Input size"),
nO=Dimension("Output size"),
W=Synapses(
"Weights matrix",
lambda obj: (obj.nO, obj.nI),
lambda W, ops: ops.xavier_uniform_init(W),
),
b=Biases("Bias vector", lambda obj: (obj.nO,)),
d_W=Gradient("W"),
d_b=Gradient("b"),
)
class Mish(Model):
"""Dense layer with mish activation.
https://arxiv.org/pdf/1908.08681.pdf
"""
name = "mish"
@property
def input_shape(self):
return (self.nB, self.nI)
@property
def output_shape(self):
return (self.nB, self.nO)
@describe.attributes(
nO=Dimension("Output size"),
nI=Dimension("Input size"),
W=Synapses(
"Weights matrix",
lambda obj: (obj.nO * 4, obj.nI + obj.nO),
lambda W, ops: copy_array(W, svd_orthonormal(W.shape)),
),
b=Biases("Bias vector", lambda obj: (obj.nO * 4,)),
forget_bias=Biases(
"Bias for forget gates",
lambda obj: (obj.nO,),
lambda b, ops: copy_array(b, ops.xp.ones(b.shape, dtype=b.dtype)),
),
d_W=Gradient("W"),
d_b=Gradient("b"),
d_forget_bias=Gradient("forget_bias"),
initial_hiddens=Biases(
"Initial hiddens", lambda obj: (obj.nO,), _uniform_init(-0.1, 0.1)
),
initial_cells=Biases(
"Initial cells", lambda obj: (obj.nO,), _uniform_init(-0.1, 0.1)
),
d_initial_hiddens=Gradient("initial_hiddens"),
d_initial_cells=Gradient("initial_cells"),
)
class LSTM_weights(Model):
def __init__(self, nO, nI):
Model.__init__(self)
self.nO = nO
self.nI = nI
def _init_to_one(W, ops):
W.fill(1.0)
def _run_child_hooks(model, X, y=None):
for hook in model.child.on_data_hooks:
hook(model.child, X, y)
@describe.on_data(_run_child_hooks)
@describe.attributes(
G=describe.Weights("Scaling vector", lambda obj: (obj.nO,), _init_to_one),
b=describe.Biases("Bias vector", lambda obj: (obj.nO,)),
d_G=describe.Gradient("G"),
d_b=describe.Gradient("b"),
m=describe.Weights("Means", lambda obj: (obj.nO,)),
v=describe.Weights("Variance", lambda obj: (obj.nO,), _init_to_one),
)
class BatchNorm(Model):
name = "batchnorm"
def __init__(self, child, **kwargs):
self.child = child
self._layers = [child]
if "nO" in kwargs:
self.nO = kwargs["nO"]
elif getattr(child, "nO", None):
self.nO = child.nO
self.nr_upd = 0
self.eps = kwargs.get("eps", 1e-5)
self.alpha = self.ops.xp.asarray([0.1], dtype="float32")
def _init_to_one(W, ops):
W.fill(1.0)
def _run_child_hooks(model, X, y=None):
if model.child:
for hook in model.child.on_data_hooks:
hook(model.child, X, y)
@describe.on_data(_run_child_hooks)
@describe.attributes(
G=describe.Weights("Scaling vector", lambda obj: (obj.nO,), _init_to_one),
b=describe.Biases("Bias vector", lambda obj: (obj.nO,)),
d_G=describe.Gradient("G"),
d_b=describe.Gradient("b"),
)
class LayerNorm(Model):
name = "layernorm"
def __init__(self, child=None, **kwargs):
self.child = child
if child is not None:
self._layers = [child]
else:
self._layers = []
Model.__init__(self, **kwargs)
if "nO" in kwargs:
self.nO = kwargs["nO"]
elif getattr(child, "nO", None):
self.nO = child.nO