Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def train_eval_logistic_regression_from_file(criterion_name=None,
eval_name=None, device_id=-1):
cur_dir = os.path.dirname(__file__)
# Using data from https://github.com/Microsoft/CNTK/wiki/Tutorial
train_file = os.path.join(cur_dir, "Train-3Classes.txt")
test_file = os.path.join(cur_dir, "Test-3Classes.txt")
X = C.input(2)
y = C.input(3)
W = C.parameter(value=np.zeros(shape=(3, 2)))
b = C.parameter(value=np.zeros(shape=(3, 1)))
out = C.times(W, X) + b
out.tag = 'output'
ce = C.cross_entropy_with_softmax(y, out)
ce.name = criterion_name
ce.tag = 'criterion'
eval = C.ops.square_error(y, out)
eval.tag = 'eval'
eval.name = eval_name
# training data readers
train_reader = C.CNTKTextFormatReader(train_file, randomize=None)
# testing data readers
test_reader = C.CNTKTextFormatReader(test_file, randomize=None)
my_sgd = C.SGDParams(
epoch_size=0, minibatch_size=25, learning_rates_per_mb=0.1, max_epochs=3)
def create_fast_rcnn_predictor(conv_out, rois, fc_layers):
# RCNN
roi_out = roipooling(conv_out, rois, cntk.MAX_POOLING, (roi_dim, roi_dim), spatial_scale=1/16.0)
fc_out = fc_layers(roi_out)
# prediction head
W_pred = parameter(shape=(4096, globalvars['num_classes']), init=normal(scale=0.01), name="cls_score.W")
b_pred = parameter(shape=globalvars['num_classes'], init=0, name="cls_score.b")
cls_score = plus(times(fc_out, W_pred), b_pred, name='cls_score')
# regression head
W_regr = parameter(shape=(4096, globalvars['num_classes']*4), init=normal(scale=0.001), name="bbox_regr.W")
b_regr = parameter(shape=globalvars['num_classes']*4, init=0, name="bbox_regr.b")
bbox_pred = plus(times(fc_out, W_regr), b_regr, name='bbox_regr')
return cls_score, bbox_pred
# It might easily run into memory issues as the matrix 'I' below might be quite large.
# In case we wan't to a dense representation for all data we have to convert the sample selector
I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
sample_selector = C.times(sample_selector_sparse, I)
inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size]
log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim]
print("hidden_vector: "+str(hidden_vector.shape))
wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim]
print("ws:"+str(wS.shape))
zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples]
# Getting the weight vector for the true label. Dimension hidden_dim
wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim]
zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1]
zSReduced = C.reduce_log_sum_exp(zS)
# Compute the cross entropy that is used for training.
# We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
# twice in the normalizing denominator of sampled softmax.
cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT
# For applying the model we also output a node providing the input for the full softmax
z = C.times_transpose(weights, hidden_vector) + bias
z = C.reshape(z, shape = (vocab_dim))
zSMax = C.reduce_max(zS)
error_on_samples = C.less(zT, zSMax)
ft_w = C.times(C.parameter((cell_dim, input_dim)), x)
ft_b = C.parameter((cell_dim))
ft_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h)
ft_c = C.parameter((cell_dim)) * prev_state_c
ft = C.sigmoid((ft_w + ft_b + ft_h + ft_c), name='ft')
# applied to cell(t-1)
bft = ft * prev_state_c
# c(t) = sum of both
ct = bft + bit
# output gate
ot_w = C.times(C.parameter((cell_dim, input_dim)), x)
ot_b = C.parameter((cell_dim))
ot_h = C.times(C.parameter((cell_dim, output_dim)), prev_state_h)
ot_c = C.parameter((cell_dim)) * prev_state_c
ot = C.sigmoid((ot_w + ot_b + ot_h + ot_c), name='ot')
# applied to tanh(cell(t))
ht = ot * C.tanh(ct)
# return cell value and hidden state
return ct, ht
def embed(x):
return C.times(x, E)
# In case we wan't to a dense representation for all data we have to convert the sample selector
I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
sample_selector = C.times(sample_selector_sparse, I)
inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size]
log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim]
print("hidden_vector: "+str(hidden_vector.shape))
wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim]
print("ws:"+str(wS.shape))
zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples]
# Getting the weight vector for the true label. Dimension hidden_dim
wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim]
zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1]
zSReduced = C.reduce_log_sum_exp(zS)
# Compute the cross entropy that is used for training.
# We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
# twice in the normalizing denominator of sampled softmax.
cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT
# For applying the model we also output a node providing the input for the full softmax
z = C.times_transpose(weights, hidden_vector) + bias
z = C.reshape(z, shape = (vocab_dim))
zSMax = C.reduce_max(zS)
error_on_samples = C.less(zT, zSMax)
return (z, cross_entropy_on_samples, error_on_samples)
m_init, v_init = moments(x_init, axes=(ct.Axis.default_batch_axis(),1))
scale_init = init_scale / ct.sqrt(v_init + 1e-10)
g_new = ct.assign(g, scale_init)
b_new = ct.assign(b, -m_init*scale_init)
x_init = ct.reshape(scale_init, (num_units, 1)) * (x_init - ct.reshape(m_init, (num_units, 1))) + ct.reshape(g_new + b_new, (num_units, 1))*0
if nonlinearity is not None:
x_init = nonlinearity(x_init)
return x_init
else:
V,g,b = get_parameters(scope, ['V','g','b'])
# use weight normalization (Salimans & Kingma, 2016)
x = ct.times(V, x)
scaler = g / ct.sqrt(squeeze(ct.reduce_sum(ct.square(V), axis=1), axes=1))
x = ct.reshape(scaler, (num_units, 1)) * x + ct.reshape(b, (num_units, 1))
if nonlinearity is not None:
x = nonlinearity(x)
return x
sample_selector_sparse = C.random_sample(sampling_weights, num_samples, allow_duplicates) # sparse matrix [num_samples * vocab_size]
if use_sparse:
sample_selector = sample_selector_sparse
else:
# Note: Sampled softmax with dense data is only supported for debugging purposes.
# It might easily run into memory issues as the matrix 'I' below might be quite large.
# In case we wan't to a dense representation for all data we have to convert the sample selector
I = C.Constant(np.eye(vocab_dim, dtype=np.float32))
sample_selector = C.times(sample_selector_sparse, I)
inclusion_probs = C.random_sample_inclusion_frequency(sampling_weights, num_samples, allow_duplicates) # dense row [1 * vocab_size]
log_prior = C.log(inclusion_probs) # dense row [1 * vocab_dim]
print("hidden_vector: "+str(hidden_vector.shape))
wS = C.times(sample_selector, weights, name='wS') # [num_samples * hidden_dim]
print("ws:"+str(wS.shape))
zS = C.times_transpose(wS, hidden_vector, name='zS1') + C.times(sample_selector, bias, name='zS2') - C.times_transpose (sample_selector, log_prior, name='zS3')# [num_samples]
# Getting the weight vector for the true label. Dimension hidden_dim
wT = C.times(target_vector, weights, name='wT') # [1 * hidden_dim]
zT = C.times_transpose(wT, hidden_vector, name='zT1') + C.times(target_vector, bias, name='zT2') - C.times_transpose(target_vector, log_prior, name='zT3') # [1]
zSReduced = C.reduce_log_sum_exp(zS)
# Compute the cross entropy that is used for training.
# We don't check whether any of the classes in the random samples coincides with the true label, so it might happen that the true class is counted
# twice in the normalizing denominator of sampled softmax.
cross_entropy_on_samples = C.log_add_exp(zT, zSReduced) - zT
# For applying the model we also output a node providing the input for the full softmax
def lstm_func(output_dim, cell_dim, x, input_dim, prev_state_h, prev_state_c):
# input gate (t)
it_w = C.times(x,C.parameter((input_dim, cell_dim)))
it_b = C.parameter((1,cell_dim))
it_h = C.times(prev_state_h,C.parameter((output_dim, cell_dim)))
it_c = C.parameter((1,cell_dim)) * prev_state_c
it = C.sigmoid((it_w + it_b + it_h + it_c), name='it')
# applied to tanh of input
bit_w = C.times(x,C.parameter((input_dim,cell_dim)))
bit_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim)))
bit_b = C.parameter((1,cell_dim))
bit = it * C.tanh(bit_w + (bit_h + bit_b))
# forget-me-not gate (t)
ft_w = C.times(x, C.parameter((input_dim,cell_dim)))
ft_b = C.parameter((1,cell_dim))
ft_h = C.times(prev_state_h,C.parameter((output_dim,cell_dim)))
ft_c = C.parameter((1,cell_dim)) * prev_state_c
ft = C.sigmoid((ft_w + ft_b + ft_h + ft_c), name='ft')
# applied to cell(t-1)
bft = ft * prev_state_c
# c(t) = sum of both
ct = bft + bit