Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if dtype == 'float32':
# data = mx.sym.identity(data=data, name='id')
data = data
else:
if dtype == 'float16':
data = mx.sym.Cast(data=data, dtype=np.float16)
data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
(nchannel, height, width) = image_shape
if height <= 32: # such as cifar10
body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
no_bias=True, name="conv0", workspace=workspace)
else: # often expected to be 224 such as imagenet
body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
no_bias=True, name="conv0", workspace=workspace)
body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
body = mx.sym.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
for i in range(num_stages):
body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
memonger=memonger)
for j in range(units[i]-1):
body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
# Although kernel is not used here when global_pool=True, we should put one
pool1 = mx.sym.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
flat = mx.sym.Flatten(data=pool1)
try:
fc1 = mx.sym.FullyConnected(data=flat, num_hidden=num_classes, name='fc1', flatten=False)
def vgg_body_factory(structure_list):
net = mx.sym.Variable("data")
for item in structure_list:
if type(item) == str:
net = mx.sym.Pooling(net, kernel=(2, 2), stride=(2, 2), pool_type="max")
else:
net = conv_factory(net, num_filter=item, kernel=(3, 3), stride=(1, 1), pad=(1, 1))
return net
gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = s_m, off_value = 0.0)
fc = fc-gt_one_hot
else:
zy = mx.sym.pick(fc, gt_label, axis=1)
cos_t = zy/s
t = mx.sym.arccos(cos_t)
if args.margin_a!=1.0:
t = t*args.margin_a
if args.margin_m>0.0:
t = t+args.margin_m
body = mx.sym.cos(t)
if args.margin_b>0.0:
body = body - args.margin_b
new_zy = body*s
diff = new_zy - zy
diff = mx.sym.expand_dims(diff, 1)
gt_one_hot = mx.sym.one_hot(gt_label, depth = args.num_classes, on_value = 1.0, off_value = 0.0)
body = mx.sym.broadcast_mul(gt_one_hot, diff)
fc = fc+body
if args.ignore_label==0:
softmax = mx.symbol.SoftmaxOutput(data=fc, label = gt_label, name=name+'_softmax', normalization='valid', grad_scale = args.grad_scale)
else:
softmax = mx.symbol.SoftmaxOutput(data=fc, label = gt_label, name=name+'_softmax', normalization='valid', use_ignore=True, ignore_label=args.ignore_label, grad_scale = args.grad_scale)
return softmax
operator = eval("mx.sym.contrib." + op_name)
elif op_name.startswith("_"):
operator = eval("mx.sym._internal." + op_name)
else:
operator = eval("mx.sym." + op_name)
res = operator(*children, **attrs, name=node_name)
node_map[nid] = res
node_op_map[nid] = [op_name]
outputs = [node_map[e[0]][e[1]] for e in jgraph["heads"]]
outputs = outputs[0] if len(outputs) == 1 else mx.sym.Group(outputs)
return outputs
if __name__ == "__main__":
sym = mx.sym.load("source.json")
sym1, _, _ = merge_bn(sym, None, None, True)
def get_loss(gram, content):
gram_loss = []
for i in range(len(gram.list_outputs())):
gvar = mx.sym.Variable("target_gram_%d" % i)
gram_loss.append(mx.sym.sum(mx.sym.square(gvar - gram[i])))
cvar = mx.sym.Variable("target_content")
content_loss = mx.sym.sum(mx.sym.square(cvar - content))
return mx.sym.Group(gram_loss), content_loss
init_memory_value = mx.sym.Variable('init_memory_value',
shape=(self.memory_size, self.memory_value_state_dim),
init=mx.init.Normal(0.1)) # (self.memory_size, self.memory_value_state_dim)
init_memory_value = mx.sym.broadcast_to(mx.sym.expand_dims(init_memory_value, axis=0),
shape=(self.batch_size, self.memory_size, self.memory_value_state_dim))
mem = DKVMN(memory_size=self.memory_size,
memory_key_state_dim=self.memory_key_state_dim,
memory_value_state_dim=self.memory_value_state_dim,
init_memory_key=init_memory_key,
init_memory_value=init_memory_value,
name="DKVMN")
### embedding
q_data = mx.sym.BlockGrad(q_data)
q_embed_data = mx.sym.Embedding(data=q_data, input_dim=self.n_question+1,
output_dim=self.q_embed_dim, name='q_embed')
slice_q_embed_data = mx.sym.SliceChannel(q_embed_data, num_outputs=self.seqlen, axis=0, squeeze_axis=True)
qa_data = mx.sym.BlockGrad(qa_data)
qa_embed_data = mx.sym.Embedding(data=qa_data, input_dim=self.n_question*2+1,
output_dim=self.qa_embed_dim, name='qa_embed')
slice_qa_embed_data = mx.sym.SliceChannel(qa_embed_data, num_outputs=self.seqlen, axis=0, squeeze_axis=True)
value_read_content_l = []
input_embed_l = []
for i in range(self.seqlen):
## Attention
q = slice_q_embed_data[i]
correlation_weight = mem.attention(q)
if bottle_neck:
conv1 = Conv(data=data, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0),
no_bias=True, workspace=workspace, name=name + '_conv1')
bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
act1 = Act(data=bn1, act_type=act_type, name=name + '_relu1')
conv2 = Conv(data=act1, num_filter=int(num_filter*0.25), kernel=(3,3), stride=(1,1), pad=(1,1),
no_bias=True, workspace=workspace, name=name + '_conv2')
bn2 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
act2 = Act(data=bn2, act_type=act_type, name=name + '_relu2')
conv3 = Conv(data=act2, num_filter=num_filter, kernel=(1,1), stride=stride, pad=(0,0), no_bias=True,
workspace=workspace, name=name + '_conv3')
bn3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
if use_se:
#se begin
body = mx.sym.Pooling(data=bn3, global_pool=True, kernel=(7, 7), pool_type='avg', name=name+'_se_pool1')
body = Conv(data=body, num_filter=num_filter//16, kernel=(1,1), stride=(1,1), pad=(0,0),
name=name+"_se_conv1", workspace=workspace)
body = Act(data=body, act_type=act_type, name=name+'_se_relu1')
body = Conv(data=body, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
name=name+"_se_conv2", workspace=workspace)
body = mx.symbol.Activation(data=body, act_type='sigmoid', name=name+"_se_sigmoid")
bn3 = mx.symbol.broadcast_mul(bn3, body)
#se end
if dim_match:
shortcut = data
else:
conv1sc = Conv(data=data, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
workspace=workspace, name=name+'_conv1sc')
shortcut = mx.sym.BatchNorm(data=conv1sc, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_sc')
if memonger:
shapes.append(mx.io.DataDesc(name='cache_l%d_k' % l,
shape=(batch_size, target_max_length - 1, self.config.model_size),
layout=C.BATCH_MAJOR))
shapes.append(mx.io.DataDesc(name='cache_l%d_v' % l,
shape=(batch_size, target_max_length - 1, self.config.model_size),
layout=C.BATCH_MAJOR))
return shapes
def get_max_seq_len(self) -> Optional[int]:
# The positional embeddings potentially pose a limit on the maximum length at inference time.
return self.pos_embedding.get_max_seq_len()
RecurrentDecoderState = NamedTuple('RecurrentDecoderState', [
('hidden', mx.sym.Symbol),
('layer_states', List[mx.sym.Symbol]),
])
"""
RecurrentDecoder state.
:param hidden: Hidden state after attention mechanism. Shape: (batch_size, num_hidden).
:param layer_states: Hidden states for RNN layers of RecurrentDecoder. Shape: List[(batch_size, rnn_num_hidden)]
"""
class RecurrentDecoderConfig(Config):
"""
Recurrent decoder configuration.
:param max_seq_len_source: Maximum source sequence length
:param rnn_config: RNN configuration.
:param attention_config: Attention configuration.
def relu6(data, prefix):
return mx.sym.clip(data,0,6,name='%s-relu6'%prefix)
gates = i2h + h2h
slice_gates = mx.sym.SliceChannel(gates, num_outputs=4,
name="t%d_l%d_slice" % (seqidx, layeridx))
Wcidc = mx.sym.broadcast_mul(param.c2i_bias, prev_state.c) + slice_gates[0]
in_gate = mx.sym.Activation(Wcidc, act_type="sigmoid")
in_transform = mx.sym.Activation(slice_gates[1], act_type="tanh")
Wcfdc = mx.sym.broadcast_mul(param.c2f_bias, prev_state.c) + slice_gates[2]
forget_gate = mx.sym.Activation(Wcfdc, act_type="sigmoid")
next_c = (forget_gate * prev_state.c) + (in_gate * in_transform)
Wcoct = mx.sym.broadcast_mul(param.c2o_bias, next_c) + slice_gates[3]
out_gate = mx.sym.Activation(Wcoct, act_type="sigmoid")
next_h = out_gate * mx.sym.Activation(next_c, act_type="tanh")
if num_hidden_proj > 0:
proj_next_h = mx.sym.FullyConnected(data=next_h,
weight=param.ph2h_weight,
no_bias=True,
num_hidden=num_hidden_proj,
name="t%d_l%d_ph2h" % (seqidx, layeridx))
return LSTMState(c=next_c, h=proj_next_h)
else:
return LSTMState(c=next_c, h=next_h)