Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _topk_forward(self, inputs, hidden, num_beams, constraints=None):
sl, bs = inputs.size()
# initial logprobs should be zero (pr of token in the start is 1)
logprobs = torch.zeros_like(inputs[:1]).view(1, bs, 1).float() # shape will be [sl, bs, 1]
inputs = inputs[:1].repeat(1, num_beams) # inputs should be only first token initially [1,bs x num_beams]
finished = to_gpu(torch.zeros(bs * num_beams).byte())
iteration = 0
final_outputs = []
self.beam_outputs = inputs.clone()
hidden = repeat_cell_state(hidden, num_beams)
while not finished.all() and iteration < self.max_iterations:
# output should be List[[sl, bs * num_beams, layer_dim], ...] sl should be one
output = self.forward(inputs, hidden=hidden, num_beams=0, constraints=constraints)
hidden = self.decoder_layer.hidden
final_outputs.append(output)
# we take the output of the last layer with dims [1, bs, output_dim]
# and get the indices of th top k for every bs
new_logprobs = F.log_softmax(output, dim=-1) # [1, bs x num_beams, nt]
num_tokens = new_logprobs.size(2)
new_logprobs = new_logprobs.view(1, bs, num_beams, num_tokens) + logprobs.unsqueeze(-1) # [1, bs, nb, nt]
# mask logprogs accordingly
def to_model(self, m, opt_fn):
model = S2SModel(to_gpu(m))
return EncoderDecoderLearner(self, model, opt_fn=opt_fn)
def to_model(self, m, opt_fn):
model = CVAEModel(to_gpu(m))
learner = EncoderDecoderLearner(self, model, opt_fn=opt_fn)
learner.crit = get_cvae_loss(pad_idx=learner.data.pad_idx)
return learner
def to_model(self, m, opt_fn):
model = HREDAttentionModel(to_gpu(m))
learner = EncoderDecoderLearner(self, model, opt_fn=opt_fn)
return learner
def _greedy_forward(self, inputs, hidden=None, constraints=None):
dec_inputs = inputs
max_iterations = min(dec_inputs.size(0), self.MAX_STEPS_ALLOWED) if self.training else self.max_iterations
inputs = V(inputs[:1].data) # inputs should be only first token initially [1,bs]
sl, bs = inputs.size()
finished = to_gpu(torch.zeros(bs).byte())
iteration = 0
self.beam_outputs = inputs.clone()
final_outputs = []
while not finished.all() and iteration < max_iterations:
# output should be List[[sl, bs, layer_dim], ...] sl should be one
if 0 < iteration and self.training and 0. < self.random() < self.pr_force:
inputs = dec_inputs[iteration].unsqueeze(0)
output = self.forward(inputs, hidden=hidden, num_beams=0, constraints=constraints)
hidden = self.decoder_layer.hidden
final_outputs.append(output) # dim should be [sl=1, bs, nt]
# inputs are the indices dims [1,bs] # repackage the var to avoid grad backwards
inputs = assert_dims(V(output.data.max(dim=-1)[1]), [1, bs])
iteration += 1
self.beam_outputs = assert_dims(torch.cat([self.beam_outputs, inputs], dim=0), [iteration + 1, bs])
new_finished = inputs.data == self.eos_token
finished = finished | new_finished
def cvae_loss_sigmoid(input, target, step=0, max_kld_step=None, **kwargs):
predictions, recog_mu, recog_log_var, prior_mu, prior_log_var, bow_logits = input
vocab = predictions.size(-1)
# dims are sq-1 times bs times vocab
dec_input = predictions[:target.size(0)].view(-1, vocab).contiguous()
bow_targets = torch.zeros_like(bow_logits).scatter(1, target.transpose(1, 0), 1)
# mask pad token
weights = to_gpu(V(torch.ones(bow_logits.size(-1)).unsqueeze_(0)))
weights[0, pad_idx] = 0
bow_loss = F.binary_cross_entropy_with_logits(bow_logits, bow_targets, weight=weights)
# targets are sq-1 times bs (one label for every word)
kld_loss = gaussian_kld(recog_mu, recog_log_var, prior_mu, prior_log_var)
target = target.view(-1).contiguous()
decoder_loss = F.cross_entropy(input=dec_input,
target=target,
ignore_index=pad_idx,
)
kld_weight = 1.0 if max_kld_step is None else min((step + 1) / max_kld_step, 1)
nonlocal STEP
if step > STEP:
if step == 0: STEP = 0
print(f"losses: decoder {decoder_loss}, bow: {bow_loss}, kld x weight: {kld_loss} x {kld_weight}")
STEP += 1
def _topk_forward(self, inputs, hidden, num_beams, constraints=None):
sl, bs = inputs.size()
# initial logprobs should be zero (pr of token in the start is 1)
logprobs = torch.zeros_like(inputs[:1]).view(1, bs, 1).float() # shape will be [sl, bs, 1]
inputs = inputs[:1].repeat(1,
num_beams) # inputs should be only first token initially [1,bs x num_beams]
finished = to_gpu(torch.zeros(bs * num_beams).byte())
iteration = 0
self.beam_outputs = inputs.clone().cpu()
hidden = repeat_cell_state(hidden, num_beams)
final_outputs = []
while not finished.all() and iteration < self.max_iterations:
# output should be List[[sl, bs * num_beams, layer_dim], ...] sl should be one
output = self.forward(inputs, hidden=hidden, num_beams=0)
step_prediction = output[-1:] # [sl, bs* num_beams , ntokens]
final_outputs.append(step_prediction.cpu())
# we take the output of the last layer with dims [1, bs, output_dim]
# and get the indices of th top k for every bs
new_logprobs = F.log_softmax(step_prediction, dim=-1) # [1, bs x num_beams, nt]
num_tokens = new_logprobs.size(2)
new_logprobs = new_logprobs.view(1, bs, num_beams, num_tokens) + logprobs.unsqueeze(-1) # [1, bs, nb, nt]
# mask logprobs if they are finished or it's the first iteration
new_logprobs = self.mask_logprobs(bs, finished, iteration, logprobs, new_logprobs, num_beams, num_tokens)