Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def validate(args, trainer, task, epoch_itr, subsets):
"""Evaluate the model on the validation set(s) and return the losses."""
valid_losses = []
for subset in subsets:
# Initialize data iterator
itr = task.get_batch_iterator(
dataset=task.dataset(subset),
max_tokens=args.max_tokens_valid,
max_sentences=args.max_sentences_valid,
max_positions=utils.resolve_max_positions(
task.max_positions(),
trainer.get_model().max_positions(),
),
ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
required_batch_size_multiple=args.required_batch_size_multiple,
seed=args.seed,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
num_workers=args.num_workers,
).next_epoch_itr(shuffle=False)
progress = progress_bar.build_progress_bar(
args, itr, epoch_itr.epoch,
prefix='valid on \'{}\' subset'.format(subset),
no_progress_bar='simple'
)
beamable_mm_beam_size=None if args.no_beamable_mm else args.beam,
need_attn=args.print_alignment,
)
if args.fp16:
model.half()
# Load alignment dictionary for unknown word replacement
# (None if no unknown word replacement, empty if no path to align dictionary)
align_dict = utils.load_align_dict(args.replace_unk)
# Load dataset (possibly sharded)
itr = task.get_batch_iterator(
dataset=task.dataset(args.gen_subset),
max_tokens=args.max_tokens,
max_sentences=args.max_sentences,
max_positions=utils.resolve_max_positions(
task.max_positions(),
*[model.max_positions() for model in models]
),
ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
required_batch_size_multiple=8,
num_shards=args.num_shards,
shard_id=args.shard_id,
).next_epoch_itr(shuffle=False)
# Initialize generator
gen_timer = StopwatchMeter()
if args.score_reference:
translator = SequenceScorer(models, task.target_dictionary)
else:
translator = SequenceGenerator(
models, task.target_dictionary, beam_size=args.beam, minlen=args.min_len,
)
# Optimize ensemble for generation
for model in models:
# model.make_generation_fast_()
if args.fp16:
model.half()
if use_cuda:
model.cuda()
# Load dataset (possibly sharded)
itr = task.get_batch_iterator(
dataset=task.dataset(args.gen_subset),
max_tokens=args.max_tokens,
max_sentences=args.max_sentences,
max_positions=utils.resolve_max_positions(
task.max_positions(),
*[model.max_positions() for model in models]
),
ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
required_batch_size_multiple=args.required_batch_size_multiple,
num_shards=args.num_shards,
shard_id=args.shard_id,
num_workers=args.num_workers,
).next_epoch_itr(shuffle=False)
# Initialize generator
gen_timer = StopwatchMeter()
generator = task.build_generator(args)
# Generate and compute score
coco = task.dataset(args.gen_subset).coco
models = [fasterrcnn.model]
# Optimize ensemble for generation
for model in models:
# model.make_generation_fast_()
if args.fp16:
model.half()
if use_cuda:
model.cuda()
# Load dataset (possibly sharded)
itr = task.get_batch_iterator(
dataset=task.dataset(args.valid_subset),
max_tokens=args.max_tokens,
max_sentences=args.max_sentences,
max_positions=utils.resolve_max_positions(
task.max_positions(),
*[model.max_positions() for model in models]
),
ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
required_batch_size_multiple=args.required_batch_size_multiple,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
num_workers=args.num_workers,
).next_epoch_itr(shuffle=False)
# Initialize generator
gen_timer = StopwatchMeter()
generator = task.build_generator(args)
# Generate and compute score
coco = task.dataset(args.valid_subset).coco
def validate(args, trainer, task, epoch_itr, subsets):
"""Evaluate the model on the validation set(s) and return the losses."""
if args.fixed_validation_seed is not None:
# set fixed seed for every validation
utils.set_torch_seed(args.fixed_validation_seed)
valid_losses = []
for subset in subsets:
# Initialize data iterator
itr = task.get_batch_iterator(
dataset=task.dataset(subset),
max_tokens=args.max_tokens_valid,
max_sentences=args.max_sentences_valid,
max_positions=utils.resolve_max_positions(
task.max_positions(),
trainer.get_model().max_positions(),
),
ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
required_batch_size_multiple=args.required_batch_size_multiple,
seed=args.seed,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
num_workers=args.num_workers,
).next_epoch_itr(shuffle=False)
progress = progress_bar.build_progress_bar(
args, itr, epoch_itr.epoch,
prefix='valid on \'{}\' subset'.format(subset),
no_progress_bar='simple'
)
def __init__(self, checkpoints, device=None, beam_size=5, use_fp16=False, tuning_ops=None):
torch.manual_seed(checkpoints.args.seed)
self._checkpoints = checkpoints
self._device = device
self._model = self._create_model(checkpoints, device=device, beam_size=beam_size, use_fp16=use_fp16)
self._translator = self._create_translator(checkpoints, beam_size)
self._tuner = self._create_tuner(checkpoints, self._model, tuning_ops, device)
self._max_positions = fairseq.utils.resolve_max_positions(
checkpoints.task.max_positions(),
self._model.max_positions(),
)
self._logger = logging.getLogger('Transformer')
self._nn_needs_reset = True
self._checkpoint = None
for model in models:
model.make_generation_fast_()
if args.fp16:
model.half()
if use_cuda:
model.cuda()
assert len(models) > 0
print('num. model params: {}'.format(sum(p.numel() for p in models[0].parameters())))
itr = task.get_batch_iterator(
dataset=dataset,
max_tokens=args.max_tokens or 36000,
max_sentences=args.max_sentences,
max_positions=utils.resolve_max_positions(*[
model.max_positions() for model in models
]),
ignore_invalid_inputs=True,
num_shards=args.num_shards,
shard_id=args.shard_id,
num_workers=args.num_workers,
).next_epoch_itr(shuffle=False)
gen_timer = StopwatchMeter()
scorer = SequenceScorer(task.target_dictionary, args.softmax_batch)
score_sum = 0.
count = 0
if args.remove_bpe is not None:
if args.remove_bpe == 'sentencepiece':
def get_train_iterator(self, epoch, combine=True):
"""Return an EpochBatchIterator over the training set for a given epoch."""
print('| loading train data for epoch {}'.format(epoch))
self.task.load_dataset(self.args.train_subset, epoch=epoch, combine=combine)
return self.task.get_batch_iterator(
dataset=self.task.dataset(self.args.train_subset),
max_tokens=self.args.max_tokens,
max_sentences=self.args.max_sentences,
max_positions=utils.resolve_max_positions(
self.task.max_positions(),
self.model.max_positions(),
),
ignore_invalid_inputs=True,
required_batch_size_multiple=self.args.required_batch_size_multiple,
seed=self.args.seed,
num_shards=self.args.distributed_world_size,
shard_id=self.args.distributed_rank,
num_workers=self.args.num_workers,
epoch=epoch,
)
def validate(args, trainer, task, epoch_itr, subsets):
"""Evaluate the model on the validation set(s) and return the losses."""
valid_losses = []
for subset in subsets:
# Initialize data iterator
itr = task.get_batch_iterator(
dataset=task.dataset(subset),
max_tokens=args.max_tokens_valid,
max_sentences=args.max_sentences_valid,
max_positions=utils.resolve_max_positions(
task.max_positions(),
trainer.get_model().max_positions(),
),
ignore_invalid_inputs=args.skip_invalid_size_inputs_valid_test,
required_batch_size_multiple=args.required_batch_size_multiple,
seed=args.seed,
num_shards=args.distributed_world_size,
shard_id=args.distributed_rank,
num_workers=args.num_workers,
).next_epoch_itr(shuffle=False)
progress = progress_bar.build_progress_bar(
args, itr, epoch_itr.epoch,
prefix='valid on \'{}\' subset'.format(subset),
no_progress_bar='simple'
)