Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument('--encoder-layers', type=int, metavar='L',
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
help='num encoder attention heads')
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use')
parser.add_argument('--pooler-activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use for pooler layer')
parser.add_argument('--encoder-normalize-before', action='store_true',
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
# args for "Reducing Transformer Depth on Demand with Structured Dropout" (Fan et al., 2019)
def __init__(self, args, task, model):
super().__init__()
self.args = args
self.task = task
self.model = model
self.bpe = encoders.build_bpe(args)
self.max_positions = min(utils.resolve_max_positions(
self.task.max_positions(),
self.model.max_positions(),
))
# this is useful for determining the device
self.register_buffer('_float_tensor', torch.tensor([0], dtype=torch.float))
def _set_input_buffer(self, incremental_state, new_buffer, name):
return utils.set_incremental_state(self, incremental_state, name, new_buffer)
net_output = model(**sample["net_input"])
final_loss = self.log_likelihood_loss(model, sample, net_output)
# Sum over batch elements
if reduce:
# B -> 1
final_loss = final_loss.sum()
if getattr(self.args, "sentence_avg", False):
sample_size = sample["target"].size(0)
else:
sample_size = sample["ntokens"]
logging_output = {
"loss": utils.item(final_loss.data) if reduce else final_loss.data,
"ntokens": sample["ntokens"],
"nsentences": sample["target"].size(0),
"sample_size": sample_size,
}
return final_loss, sample_size, logging_output
) -> List[List[Dict[str, torch.Tensor]]]:
if torch.is_tensor(tokenized_sentences) and tokenized_sentences.dim() == 1:
return self.generate(
tokenized_sentences.unsqueeze(0), beam=beam, verbose=verbose, **kwargs
)[0]
# build generator using current args as well as any kwargs
gen_args = copy.copy(self.args)
gen_args.beam = beam
for k, v in kwargs.items():
setattr(gen_args, k, v)
generator = self.task.build_generator(gen_args)
results = []
for batch in self._build_batches(tokenized_sentences, skip_invalid_size_inputs):
batch = utils.apply_to_sample(lambda t: t.to(self.device), batch)
translations = self.task.inference_step(generator, self.models, batch)
for id, hypos in zip(batch["id"].tolist(), translations):
results.append((id, hypos))
# sort output to match input order
outputs = [hypos for _, hypos in sorted(results, key=lambda x: x[0])]
if verbose:
def getarg(name, default):
return getattr(gen_args, name, getattr(self.args, name, default))
for source_tokens, target_hypotheses in zip(tokenized_sentences, outputs):
src_str_with_unk = self.string(source_tokens)
print('S\t{}'.format(src_str_with_unk))
for hypo in target_hypotheses:
def load_pretained_model(path, src_dict_path, tgt_dict_path):
state = torch.load(path, map_location=lambda s, l: default_restore_location(s, 'cpu'))
model = utils._upgrade_state_dict(state)
args = model['args']
state_dict = model['model']
src_dict = Dictionary.load(src_dict_path)
tgt_dict = Dictionary.load(tgt_dict_path)
assert src_dict.pad() == tgt_dict.pad()
assert src_dict.eos() == tgt_dict.eos()
assert src_dict.unk() == tgt_dict.unk()
task = TranslationTask(args, src_dict, tgt_dict)
model = task.build_model(args, mode="MT")
model.upgrade_state_dict(state_dict)
model.load_state_dict(state_dict, strict=True)
print("Load pre-trained MT model from {}".format(path))
return model
def _set_input_buffer(self, incremental_state, new_buffer):
return utils.set_incremental_state(self, incremental_state, 'input_buffer', new_buffer)
def add_args(parser):
"""Add model-specific arguments to the parser."""
parser.add_argument('--encoder-layers', type=int, metavar='L',
help='num encoder layers')
parser.add_argument('--encoder-embed-dim', type=int, metavar='H',
help='encoder embedding dimension')
parser.add_argument('--encoder-ffn-embed-dim', type=int, metavar='F',
help='encoder embedding dimension for FFN')
parser.add_argument('--encoder-attention-heads', type=int, metavar='A',
help='num encoder attention heads')
parser.add_argument('--activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use')
parser.add_argument('--pooler-activation-fn',
choices=utils.get_available_activation_fns(),
help='activation function to use for pooler layer')
parser.add_argument('--encoder-normalize-before', action='store_true',
help='apply layernorm before each encoder block')
parser.add_argument('--dropout', type=float, metavar='D',
help='dropout probability')
parser.add_argument('--attention-dropout', type=float, metavar='D',
help='dropout probability for attention weights')
parser.add_argument('--activation-dropout', type=float, metavar='D',
help='dropout probability after activation in FFN')
parser.add_argument('--pooler-dropout', type=float, metavar='D',
help='dropout probability in the masked_lm pooler layers')
parser.add_argument('--max-positions', type=int,
help='number of positional embeddings to learn')
parser.add_argument('--load-checkpoint-heads', action='store_true',
help='(re-)register and load heads when loading checkpoints')
def clip_grad_norm(self, max_norm):
"""Clips gradient norm and updates dynamic loss scaler."""
self._sync_fp16_grads_to_fp32()
grad_norm = utils.clip_grad_norm_(self.fp32_params.grad.data, max_norm)
# detect overflow and adjust loss scale
overflow = DynamicLossScaler.has_overflow(grad_norm)
self.scaler.update_scale(overflow)
if overflow:
if self.scaler.loss_scale <= self.min_loss_scale:
# Use FloatingPointError as an uncommon error that parent
# functions can safely catch to stop training.
raise FloatingPointError((
'Minimum loss scale reached ({}). Your loss is probably exploding. '
'Try lowering the learning rate, using gradient clipping or '
'increasing the batch size.'
).format(self.min_loss_scale))
raise OverflowError('setting loss scale to: ' + str(self.scaler.loss_scale))
return grad_norm
nll_loss = -1.0 * target_logprobs.sum()
# 6) Gather losses
loss = (
self.args.rl_weight * seq_loss
+ self.args.word_weight * word_loss
+ nll_loss
)
# Logging
sample_size = (
sample["target"].size(0) if self.args.sentence_avg else sample["ntokens"]
)
logging_output = {
"loss": utils.item(loss.data) if reduce else loss.data,
"nll_loss": utils.item(nll_loss.data) if reduce else nll_loss.data,
"ntokens": sample["ntokens"],
"nsentences": sample["target"].size(0),
"sample_size": sample_size,
}
return loss, sample_size, logging_output