Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@lq.utils.register_keras_custom_object
@lq.utils.set_precision(1)
def magnitude_aware_sign_unclipped(x):
"""
Scaled sign function with identity pseudo-gradient as used for the weights
in the DoReFa paper. The Scale factor is calculated per layer.
"""
scale_factor = tf.stop_gradient(tf.reduce_mean(tf.abs(x)))
@tf.custom_gradient
def _magnitude_aware_sign(x):
return lq.math.sign(x) * scale_factor, lambda dy: dy
return _magnitude_aware_sign(x)
if predicate(var):
self.var_opt_mapping[var.name] = optimizer_index
num_optimizers += 1
if num_optimizers > 1:
raise ValueError(f"Variable `{var}` claimed by multiple optimizers.")
if num_optimizers == 0:
if self.default is not None:
self.var_opt_mapping[var.name] = self.DEFAULT_OPT_INDEX
else:
warnings.warn(
f"No `default_optimizer` provided to train variable `{var}`."
)
@utils.register_keras_custom_object
class Bop(tf.keras.optimizers.Optimizer):
"""Binary optimizer (Bop).
Bop is a latent-free optimizer for Binarized Neural Networks (BNNs) and
Binary Weight Networks (BWN).
Bop maintains an exponential moving average of the gradients controlled by
`gamma`. If this average exceeds the `threshold`, a weight is flipped.
Additionally, Bop accepts a regular optimizer that is applied to the
non-binary weights in the network.
The hyperparameter `gamma` is somewhat analogues to the learning rate in
SGD methods: a high `gamma` results in rapid convergence but also makes
training more noisy.
Note that the default `threshold` is not optimal for all situations.
@utils.register_keras_custom_object
@utils.set_precision(1)
@tf.custom_gradient
def approx_sign(x):
r"""
Sign binarization function.
\\[
q(x) = \begin{cases}
-1 & x < 0 \\\
1 & x \geq 0
\end{cases}
\\]
The gradient is estimated using the ApproxSign method.
\\[\frac{\partial q(x)}{\partial x} = \begin{cases}
(2 - 2 \left|x\right|) & \left|x\right| \leq 1 \\\
0 & \left|x\right| > 1
use_bias=use_bias,
input_quantizer=input_quantizer,
kernel_quantizer=kernel_quantizer,
kernel_initializer=kernel_initializer,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
bias_regularizer=bias_regularizer,
activity_regularizer=activity_regularizer,
kernel_constraint=kernel_constraint,
bias_constraint=bias_constraint,
metrics=metrics,
**kwargs,
)
@utils.register_keras_custom_object
class QuantConv3D(QuantizerBase, tf.keras.layers.Conv3D):
"""3D convolution layer (e.g. spatial convolution over volumes).
This layer creates a convolution kernel that is convolved
with the layer input to produce a tensor of
outputs. `input_quantizer` and `kernel_quantizer` are the element-wise quantization
functions to use. If both quantization functions are `None` this layer is
equivalent to `Conv3D`. If `use_bias` is True, a bias vector is created and
added to the outputs. Finally, if `activation` is not `None`,
it is applied to the outputs as well.
When using this layer as the first layer in a model, provide the keyword argument
`input_shape` (tuple of integers, does not include the sample axis),
e.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes
with a single channel, in `data_format="channels_last"`.
@utils.register_keras_custom_object
@utils.set_precision(1)
def ste_heaviside(x, clip_value=1.0):
r"""
Binarization function with output values 0 and 1.
\\[
q(x) = \begin{cases}
+1 & x > 0 \\\
0 & x \leq 0
\end{cases}
\\]
The gradient is estimated using the Straight-Through Estimator
(essentially the binarization is replaced by a clipped identity on the
backward pass).
```plot-activation
quantizers.ste_heaviside
```
# Arguments
clip_value: Threshold for clipping gradients. If `None` gradients are not clipped.
# Returns
AND Binarization function
"""
def __init__(self, clip_value=1.0):
super().__init__(ste_heaviside, clip_value=clip_value)
@utils.register_keras_custom_object
class SwishSign(QuantizerFunctionWrapper):
r"""Sign binarization function.
\\[
q(x) = \begin{cases}
-1 & x < 0 \\\
1 & x \geq 0
\end{cases}
\\]
The gradient is estimated using the SignSwish method.
\\[
\frac{\partial q_{\beta}(x)}{\partial x} = \frac{\beta\left\\{2-\beta x \tanh \left(\frac{\beta x}{2}\right)\right\\}}{1+\cosh (\beta x)}
\\]
@utils.register_keras_custom_object
@utils.set_precision(2)
def ste_tern(x, threshold_value=0.05, ternary_weight_networks=False, clip_value=1.0):
r"""Ternarization function.
\\[
q(x) = \begin{cases}
+1 & x > \Delta \\\
0 & |x| < \Delta \\\
-1 & x < - \Delta
\end{cases}
\\]
where \\(\Delta\\) is defined as the threshold and can be passed as an argument,
or can be calculated as per the Ternary Weight Networks original paper, such that
\\[