Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def masked_conv2d(x, num_filters, filter_shape=(3,3), strides=(1,1), pad=True, nonlinearity=None, mask_type=None, h=None, bias=True, init=ct.glorot_uniform()):
''' Convolution layer with mask and conditional input support. '''
output_channels_shape = _as_tuple(num_filters)
x_channels_shape = _as_tuple(x.shape[0])
paddings = (False,) + (pad,)*len(filter_shape)
W = ct.parameter((num_filters, x.shape[0]) + filter_shape, init=init, name='W')
if mask_type is not None:
filter_center = (filter_shape[0] // 2, filter_shape[1] // 2)
mask = np.ones(W.shape, dtype=np.float32)
mask[:,:,filter_center[0]:,filter_center[1]+1:] = 0
mask[:,:,filter_center[0]+1:,:] = 0.
if mask_type == 'a':
mask[:,:,filter_center[0],filter_center[1]] = 0
W = ct.element_times(W, ct.constant(mask))
if bias:
b = ct.parameter((num_filters, 1, 1), name='b')
x = ct.convolution(W, x, strides=x_channels_shape + strides, auto_padding=paddings) + b
def embed(self):
# load glove
npglove = np.zeros((self.wg_dim, self.hidden_dim), dtype=np.float32)
with open(os.path.join(self.abs_path, 'glove.6B.100d.txt'), encoding='utf-8') as f:
for line in f:
parts = line.split()
word = parts[0].lower()
if word in self.vocab:
npglove[self.vocab[word],:] = np.asarray([float(p) for p in parts[1:]])
glove = C.constant(npglove)
nonglove = C.parameter(shape=(len(self.vocab) - self.wg_dim, self.hidden_dim), init=C.glorot_uniform(), name='TrainableE')
def func(wg, wn):
return C.times(wg, glove) + C.times(wn, nonglove)
return func
def OptimizedRnnStack(hidden_dim, num_layers=1, recurrent_op='lstm', bidirectional=False, use_cudnn=True, name=''):
if use_cudnn:
W = C.parameter(_INFERRED + (hidden_dim,), init=C.glorot_uniform())
def func(x):
return C.optimized_rnnstack(x, W, hidden_dim, num_layers, bidirectional, recurrent_op=recurrent_op, name=name)
return func
else:
def func(x):
return C.splice(
C.layers.Recurrence(C.layers.LSTM(hidden_dim))(x),
C.layers.Recurrence(C.layers.LSTM(hidden_dim), go_backwards=True)(x),
name=name)
return func
value = value.value
# we don't support init parameter with symbolic op, so eval it first as
# workaround
if isinstance(value, C.cntk_py.Function):
value = eval(value)
shape = value.shape if hasattr(value, 'shape') else ()
if hasattr(value, 'dtype') and value.dtype != dtype and len(shape) > 0:
value = value.astype(dtype)
# TODO: remove the conversion when cntk supports int32, int64
# https://www.cntk.ai/pythondocs/cntk.variables.html#cntk.variables.Parameter
dtype = 'float32' if 'int' in str(dtype) else dtype
v = C.parameter(shape=shape,
init=value,
dtype=dtype,
name=_prepare_name(name, 'variable'))
v._keras_shape = v.shape
v._uses_learning_phase = False
v.constraint = constraint
return v
def train_eval_logistic_regression_from_file(criterion_name=None,
eval_name=None, device_id=-1):
cur_dir = os.path.dirname(__file__)
# Using data from https://github.com/Microsoft/CNTK/wiki/Tutorial
train_file = os.path.join(cur_dir, "Train-3Classes.txt")
test_file = os.path.join(cur_dir, "Test-3Classes.txt")
X = C.input(2)
y = C.input(3)
W = C.parameter(value=np.zeros(shape=(2, 3)))
b = C.parameter(value=np.zeros(shape=(1, 3)))
out = C.times(X, W) + b
out.tag = 'output'
ce = C.cross_entropy_with_softmax(y, out)
ce.name = criterion_name
ce.tag = 'criterion'
eval = C.ops.square_error(y, out)
eval.tag = 'eval'
eval.name = eval_name
# training data readers
train_reader = C.CNTKTextFormatReader(train_file, randomize=None)
# testing data readers
test_reader = C.CNTKTextFormatReader(test_file, randomize=None)
def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
# RCNN
roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
fc_out = fc_layers(roi_out)
# prediction head
W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')
# regression head
W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')
return cls_score, bbox_pred
def add_dnn_layer(in_dim, out_dim, x, param_scale):
W = C.parameter((out_dim, in_dim)) * param_scale
b = C.parameter((out_dim, 1)) * param_scale
t = C.times(W, x)
return C.plus(t, b)
def rnet_output_layer(self, attention_context, query):
att_context = C.placeholder(shape=(2*self.hidden_dim,))
q_processed = C.placeholder(shape=(2*self.hidden_dim,))
wuq = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
whp = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
wha = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
v = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform())
bias = C.parameter(shape=(2*self.hidden_dim), init=C.glorot_uniform())
whp_end = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
wha_end = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
v_end = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform())
# sequence[tensor[1]] q_len x 1
s0 = C.times(C.tanh(C.times(q_processed, wuq) + bias), v)
a0 = C.sequence.softmax(s0)
rQ = C.sequence.reduce_sum(a0 * q_processed)
# sequence[tensor[1]] plen x 1
ts = C.reshape(C.times(C.tanh(
C.times(att_context, whp) + C.times(C.sequence.broadcast_as(rQ, att_context), wha)), v), (-1))
# sequence[tensor[1]]
ta = C.sequence.softmax(ts)
# sequence[2d] 1 x 2d
c0 = C.reshape(C.sequence.reduce_sum(ta * att_context), (2*self.hidden_dim))
def attention_layer(self, context, query, layer):
q_processed = C.placeholder(shape=(2*self.hidden_dim,))
p_processed = C.placeholder(shape=(2*self.hidden_dim,))
qvw, qvw_mask = C.sequence.unpack(q_processed, padding_value=0).outputs
wq = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
wp = C.parameter(shape=(2*self.hidden_dim, 2*self.hidden_dim), init=C.glorot_uniform())
wg = C.parameter(shape=(8*self.hidden_dim, 8*self.hidden_dim), init=C.glorot_uniform())
v = C.parameter(shape=(2*self.hidden_dim, 1), init=C.glorot_uniform())
# seq[tensor[2d]] p_len x 2d
wpt = C.reshape(C.times(p_processed, wp), (-1, 2*self.hidden_dim))
# q_len x 2d
wqt = C.reshape(C.times(qvw, wq), (-1, 2*self.hidden_dim))
# seq[tensor[q_len]]
S = C.reshape(C.times(C.tanh(C.sequence.broadcast_as(wqt, p_processed) + wpt), v), (-1))
qvw_mask_expanded = C.sequence.broadcast_as(qvw_mask, p_processed)
# seq[tensor[q_len]]
S = C.element_select(qvw_mask_expanded, S, C.constant(-1e+30))
def deconv2d(x, num_filters, filter_shape=(3,3), strides=(1,1), pad=True, nonlinearity=None, init=global_init, init_scale=1., counters={}, first_run=False):
''' Deconvolution layer. '''
scope = get_name('deconv2d', counters)
output_channels_shape = _as_tuple(num_filters)
x_shape = x.shape # CHW
x_channels_shape = _as_tuple(x.shape[0])
paddings = (False,) + (pad,)*len(filter_shape)
if pad:
output_shape = (num_filters, x_shape[1] * strides[0], x_shape[2] * strides[1])
else:
output_shape = (num_filters, x_shape[1] * strides[0] + filter_shape[0] - 1, x_shape[2] * strides[1] + filter_shape[1] - 1)
if first_run:
V = ct.parameter(x_channels_shape + output_channels_shape + filter_shape, init=init, name='V'); set_parameter(scope, 'V', V)
g = ct.parameter(output_channels_shape, init=global_g_init, name='g'); set_parameter(scope, 'g', g)
b = ct.parameter(output_channels_shape, name='b'); set_parameter(scope, 'b', b)
# use weight normalization (Salimans & Kingma, 2016)
V_norm = l2_normalize(V, axes=(0, 2, 3))
x_init = ct.convolution_transpose(V_norm, x, strides=x_channels_shape + strides, output_shape=output_shape, auto_padding=paddings)
m_init, v_init = moments(x_init, axes=(ct.Axis.default_batch_axis(),1,2))
scale_init = init_scale / ct.sqrt(v_init + 1e-8)
g_new = ct.assign(g, scale_init)
b_new = ct.assign(b, -m_init*scale_init)
x_init = ct.reshape(scale_init, (num_filters, 1, 1))*(x_init-ct.reshape(m_init, (num_filters, 1, 1))) + ct.reshape(g_new + b_new, (num_filters, 1, 1))*0
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init