Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Args:
learn: Learner object that will be used for prediction
dl: DataLoader the model will use to load samples
with_loss: If True, it will also return the loss on each prediction
n_batch: Number of batches to predict. If not specified, it will run the predictions for n batches
where n = sample size // BATCH_SIZE
pbar: ProgressBar object
"""
# Note: In Fastai, for DatasetType.Train, only the output of complete minibatches is computed. Ie if one has 101 images,
# and uses a minibatch size of 16, then len(feats) is 96 and not 101. For DatasetType.Valid this is not the case,
# and len(feats) is as expected 101. A way around this is to use DatasetType.Fix instead when referring to the training set.
# See e.g. issue: https://forums.fast.ai/t/get-preds-returning-less-results-than-length-of-original-dataset/34148
if dl == DatasetType.Train:
dl = DatasetType.Fix
lf = learn.loss_func if with_loss else None
return fastai.basic_train.get_preds(
learn.model,
dl,
cb_handler=CallbackHandler(learn.callbacks),
activ=_loss_func2activ(learn.loss_func),
loss_func=lf,
n_batch=n_batch,
pbar=pbar,
)
partial(SaveModelCallback, every='improvement', mode='min',
monitor='group_mean_log_mae', name=model_str)
]
learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns,
wd=args.wd, loss_func=contribs_rmse_loss)
if args.start_epoch > 0:
learn.load(model_str)
torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)
learn.fit_one_cycle(args.epochs, max_lr=args.lr, start_epoch=args.start_epoch)
# make predictions
val_contrib_preds = learn.get_preds(DatasetType.Valid)
test_contrib_preds = learn.get_preds(DatasetType.Test)
val_preds = val_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
test_preds = test_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
# store results
store_submit(test_preds, model_str, print_head=True)
store_oof(val_preds, model_str, print_head=True)
else: learn.load(model_str)
torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)
learn.fit(args.epochs)
# make predictions
n_val = len(train_df[train_df['molecule_id'].isin(val_mol_ids)])
val_preds = np.zeros((n_val, args.epochs))
test_preds = np.zeros((len(test_df), args.epochs))
for m in range(args.epochs):
print(f'Predicting for model {m}')
learn.load(model_se_str+f'_{m}')
val_contrib_preds = learn.get_preds(DatasetType.Valid)
test_contrib_preds = learn.get_preds(DatasetType.Test)
val_preds[:,m] = val_contrib_preds[0][:,-1].detach().numpy()
test_preds[:,m] = test_contrib_preds[0][:,-1].detach().numpy()
val_preds = val_preds * C.SC_STD + C.SC_MEAN
test_preds = test_preds * C.SC_STD + C.SC_MEAN
# store results
store_submit(pd.DataFrame(test_preds), snapshots_str, print_head=True)
store_oof(pd.DataFrame(val_preds), snapshots_str, print_head=True)
partial(GradientClipping, clip=10), GroupMeanLogMAE,
partial(SaveModelCallback, every='improvement', mode='min',
monitor='group_mean_log_mae', name=model_str)
]
learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns,
wd=wd, loss_func=contribs_rmse_loss)
learn.load(model_str)
# check if validation metrics are correct
print(learn.validate())
# make predictions
val_contrib_preds = learn.get_preds(DatasetType.Valid)
test_contrib_preds = learn.get_preds(DatasetType.Test)
val_preds = val_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
test_preds = test_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
# store results
store_submit(test_preds, model_str, print_head=True)
store_oof(val_preds, model_str, print_head=True)
def _update_batches_if_needed(self)->None:
"one_batch function is extremely slow with large datasets. This is caching the result as an optimization."
if self.learn.data.valid_dl is None: return # Running learning rate finder, so return
update_batches = self.data is not self.learn.data
if not update_batches: return
self.data = self.learn.data
self.trn_batch = self._get_new_batch(ds_type=DatasetType.Train)
self.val_batch = self._get_new_batch(ds_type=DatasetType.Valid)
def _tta_only(learn:Learner, ds_type:DatasetType=DatasetType.Valid, activ:nn.Module=None, scale:float=1.35) -> Iterator[List[Tensor]]:
"Computes the outputs for several augmented inputs for TTA"
dl = learn.dl(ds_type)
ds = dl.dataset
old = ds.tfms
activ = ifnone(activ, _loss_func2activ(learn.loss_func))
augm_tfm = [o for o in learn.data.train_ds.tfms if o.tfm not in
(crop_pad, flip_lr, dihedral, zoom)]
try:
pbar = master_bar(range(8))
for i in pbar:
row = 1 if i&1 else 0
col = 1 if i&2 else 0
flip = i&4
d = {'row_pct':row, 'col_pct':col, 'is_random':False}
tfm = [*augm_tfm, zoom(scale=scale, **d), crop_pad(**d)]
if flip: tfm.append(flip_lr(p=1.))
def write(self, learn:Learner, trn_batch:Tuple, val_batch:Tuple, iteration:int, tbwriter:SummaryWriter):
self._write_for_dstype(learn=learn, batch=val_batch, iteration=iteration,
tbwriter=tbwriter, ds_type=DatasetType.Valid)
self._write_for_dstype(learn=learn, batch=trn_batch, iteration=iteration,
tbwriter=tbwriter, ds_type=DatasetType.Train)
def write(self, learn:Learner, trn_batch:Tuple, val_batch:Tuple, iteration:int, tbwriter:SummaryWriter):
self._write_for_dstype(learn=learn, batch=val_batch, iteration=iteration,
tbwriter=tbwriter, ds_type=DatasetType.Valid)
self._write_for_dstype(learn=learn, batch=trn_batch, iteration=iteration,
tbwriter=tbwriter, ds_type=DatasetType.Train)
def _update_batches_if_needed(self):
# one_batch function is extremely slow with large datasets. This is an optimization.
# Note that also we want to always show the same batches so we can see changes
# in tensorboard
update_batches = self.data is not self.learn.data
if update_batches:
self.data = self.learn.data
self.trn_batch = self.learn.data.one_batch(
ds_type=DatasetType.Train, detach=True, denorm=False, cpu=False)
self.val_batch = self.learn.data.one_batch(
ds_type=DatasetType.Valid, detach=True, denorm=False, cpu=False)
def _update_batches_if_needed(self)->None:
"one_batch function is extremely slow with large datasets. This is caching the result as an optimization."
if self.learn.data.valid_dl is None: return # Running learning rate finder, so return
update_batches = self.data is not self.learn.data
if not update_batches: return
self.data = self.learn.data
self.trn_batch = self._get_new_batch(ds_type=DatasetType.Train)
self.val_batch = self._get_new_batch(ds_type=DatasetType.Valid)