Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def log_params(self):
"""
Logs paramteres to generic logger MlLogger
"""
params = {
"lm_type": self.language_model.__class__.__name__,
"lm_name": self.language_model.name,
"prediction_heads": ",".join(
[head.__class__.__name__ for head in self.prediction_heads]
),
"lm_output_types": ",".join(self.lm_output_types),
}
try:
MlLogger.log_params(params)
except Exception as e:
logger.warning(f"ML logging didn't work: {e}")
def log_params(self):
params = {"epochs": self.epochs, "n_gpu": self.n_gpu, "device": self.device}
MlLogger.log_params(params)
def _log_params(self):
params = {
"processor": self.__class__.__name__,
"tokenizer": self.tokenizer.__class__.__name__,
}
names = ["max_seq_len", "dev_split"]
for name in names:
value = getattr(self, name)
params.update({name: str(value)})
try:
MlLogger.log_params(params)
except Exception as e:
logger.warning(f"ML logging didn't work: {e}")
if schedule_name in scheduler_translations.keys():
schedule_name = scheduler_translations[schedule_name]
# in contrast to torch, we actually get here a method and not a class
sched_constructor = getattr(import_module('transformers.optimization'), schedule_name)
except AttributeError:
raise AttributeError(f"Scheduler '{schedule_name}' not found in 'torch' or 'transformers'")
logger.info(f"Using scheduler '{schedule_name}'")
# get supported args of constructor
allowed_args = inspect.signature(sched_constructor).parameters.keys()
# convert from warmup proporation to steps if required
if 'num_warmup_steps' in allowed_args and 'num_warmup_steps' not in opts and 'warmup_proportion' in opts:
opts['num_warmup_steps'] = int(opts["warmup_proportion"] * opts["num_training_steps"])
MlLogger.log_params({"warmup_proportion": opts["warmup_proportion"]})
# only pass args that are supported by the constructor
opts = {k: v for k, v in opts.items() if k in allowed_args}
# Logging
logger.info(f"Loading schedule `{schedule_name}`: '{opts}'")
MlLogger.log_params(opts)
MlLogger.log_params({"schedule_name": schedule_name})
return sched_constructor(optimizer, **opts)
self.ave_len = np.mean(seq_lens)
logger.info("Examples in train: {}".format(self.counts["train"]))
logger.info("Examples in dev : {}".format(self.counts["dev"]))
logger.info("Examples in test : {}".format(self.counts["test"]))
logger.info("")
logger.info("Max sequence length: {}".format(max(seq_lens)))
logger.info("Average sequence length after clipping: {}".format(self.ave_len))
logger.info("Proportion clipped: {}".format(self.clipped))
if self.clipped > 0.5:
logger.info("[Farmer's Tip] {}% of your samples got cut down to {} tokens. "
"Consider increasing max_seq_len. "
"This will lead to higher memory consumption but is likely to "
"improve your model performance".format(round(self.clipped * 100, 1), max_seq_len))
MlLogger.log_params(
{
"n_samples_train": self.counts["train"],
"n_samples_dev": self.counts["dev"],
"n_samples_test": self.counts["test"],
"batch_size": self.batch_size,
"ave_seq_len": self.ave_len,
"clipped": self.clipped
}
"O2" (Almost FP16)
"O3" (Pure FP16).
See details on: https://nvidia.github.io/apex/amp.html
:return: model, optimizer, scheduler
"""
if use_amp and not AMP_AVAILABLE:
raise ImportError(f'Got use_amp = {use_amp}, but cannot find apex. '
'Please install Apex if you want to make use of automatic mixed precision. '
'https://github.com/NVIDIA/apex')
num_train_optimization_steps = calculate_optimization_steps(
n_batches, grad_acc_steps, n_epochs, local_rank
)
# Log params
MlLogger.log_params({
"use_amp": use_amp,
"num_train_optimization_steps": num_train_optimization_steps,
})
# Use some defaults to simplify life of inexperienced users
if optimizer_opts is None:
optimizer_opts = {"name": "TransformersAdamW", "correct_bias": False, "weight_decay": 0.01}
optimizer_opts["lr"] = learning_rate
if schedule_opts is None:
# Default schedule: Linear Warmup with 10% warmup
schedule_opts = {"name": "LinearWarmup",
"num_warmup_steps": 0.1 * num_train_optimization_steps,
"num_training_steps": num_train_optimization_steps}
# schedule_opts = {"name": "OneCycleLR", "max_lr":learning_rate, "pct_start": 0.1,
logger.info(f"Using scheduler '{schedule_name}'")
# get supported args of constructor
allowed_args = inspect.signature(sched_constructor).parameters.keys()
# convert from warmup proporation to steps if required
if 'num_warmup_steps' in allowed_args and 'num_warmup_steps' not in opts and 'warmup_proportion' in opts:
opts['num_warmup_steps'] = int(opts["warmup_proportion"] * opts["num_training_steps"])
MlLogger.log_params({"warmup_proportion": opts["warmup_proportion"]})
# only pass args that are supported by the constructor
opts = {k: v for k, v in opts.items() if k in allowed_args}
# Logging
logger.info(f"Loading schedule `{schedule_name}`: '{opts}'")
MlLogger.log_params(opts)
MlLogger.log_params({"schedule_name": schedule_name})
return sched_constructor(optimizer, **opts)
def _get_optim(model, opts):
""" Get the optimizer based on dictionary with options. Options are passed to the optimizer constructor.
:param model: model to optimize
:param opts: config dictionary that will be passed to optimizer together with the params
(e.g. lr, weight_decay, correct_bias ...). no_decay' can be given - parameters containing any of those strings
will have weight_decay set to 0.
:return: created optimizer
"""
optimizer_name = opts.pop('name', None)
# Logging
logger.info(f"Loading optimizer `{optimizer_name}`: '{opts}'")
MlLogger.log_params(opts)
MlLogger.log_params({"optimizer_name": optimizer_name})
weight_decay = opts.pop('weight_decay', None)
no_decay = opts.pop('no_decay', None)
if no_decay:
optimizable_parameters = [
{'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) and p.requires_grad],
**opts},
{'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) and p.requires_grad],
'weight_decay': 0.0,
**opts}
]
else:
optimizable_parameters = [{'params': [p for p in model.parameters() if p.requires_grad], **opts}]