How to use the fastai.basic_train.Learner function in fastai

To help you get started, we’ve selected a few fastai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github optuna / optuna / tests / integration_tests / test_fastai.py View on Github external
def objective(trial):
        # type: (optuna.trial.Trial) -> float

        model = nn.Sequential(nn.Linear(20, 1), nn.Sigmoid())
        learn = Learner(data_bunch, model, metrics=[accuracy], callback_fns=[
            partial(FastAIPruningCallback, trial=trial, monitor='valid_loss')
        ])

        learn.fit(1)

        return 1.0
github robinniesert / kaggle-champs / snapshot_train.py View on Github external
torch.cuda.set_device(args.local_rank)
    torch.distributed.init_process_group(backend='nccl', init_method='env://')


# train model
callback_fns = [
    partial(GradientClipping, clip=10),
    partial(GroupMeanLogMAE, snapshot_ensemble=True),
    partial(SaveModelCallback, every='epoch', mode='min',
            monitor='group_mean_log_mae', name=model_se_str),
    partial(WarmRestartsLRScheduler, n_cycles=args.epochs,
            lr=(args.lr, args.lr/args.lr_div), mom=(args.mom, 0.95),
            cycle_len=args.cycle_len, cycle_mult=args.cycle_mult,
            start_epoch=args.start_epoch)
]
learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns,
                wd=args.wd, loss_func=contribs_rmse_loss)
if args.start_epoch > 0: learn.load(model_se_str+f'_{args.start_epoch-1}')
else: learn.load(model_str)
torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)

learn.fit(args.epochs)


# make predictions
n_val = len(train_df[train_df['molecule_id'].isin(val_mol_ids)])
val_preds = np.zeros((n_val, args.epochs))
test_preds = np.zeros((len(test_df), args.epochs))
for m in range(args.epochs):
    print(f'Predicting for model {m}')
    learn.load(model_se_str+f'_{m}')
github fastai / fastai / fastai / distributed.py View on Github external
stats = np.array([[v] + m for v,m in zip(recorder.val_losses,recorder.metrics)])
        np.save(cache_path/f'metrics_{self.cuda_id}', stats)

def _learner_parallel(learn:Learner):
    "Use nn.DataParallel when training and remove when done"
    if not torch.cuda.is_available(): warnings.warn('CUDA is not available, check your drivers - training will continue on CPU', ResourceWarning) 
    learn.callbacks.append(ParallelTrainer(learn))
    return learn

def _learner_distributed(learn:Learner, cuda_id:int, cache_dir:PathOrStr='tmp'):
    "Put `learn` on distributed training with `cuda_id`."
    learn.callbacks.append(DistributedTrainer(learn, cuda_id))
    learn.callbacks.append(DistributedRecorder(learn, cuda_id, cache_dir))
    return learn

Learner.to_distributed = _learner_distributed
Learner.to_parallel = _learner_parallel

def read_metrics(cache_path:PathOrStr, n_gpus:int, reduce:bool=True):
    losses,metrics = [],[]
    for i in range(n_gpus):
        losses.append(np.load(cache_path/f'losses_{i}.npy')[None])
        metrics.append(np.load(cache_path/f'metrics_{i}.npy')[None])
    if reduce:
        losses,metrics = np.concatenate(losses,0),np.concatenate(metrics,0)
        return losses.mean(0),metrics.mean(0)
    return losses,metrics

def setup_distrib(gpu:Any=None):
    if gpu is None: return gpu
    gpu = int(gpu)
    torch.cuda.set_device(int(gpu))
github robinniesert / kaggle-champs / train.py View on Github external
)


# initialize distributed
if distributed_train:
    torch.cuda.set_device(args.local_rank)
    torch.distributed.init_process_group(backend='nccl', init_method='env://')


# train model
callback_fns = [
    partial(GradientClipping, clip=10), GroupMeanLogMAE,
    partial(SaveModelCallback, every='improvement', mode='min',
            monitor='group_mean_log_mae', name=model_str)
]
learn = Learner(db, model, metrics=[rmse, mae], callback_fns=callback_fns,
                wd=args.wd, loss_func=contribs_rmse_loss)
if args.start_epoch > 0:
    learn.load(model_str)
    torch.cuda.empty_cache()
if distributed_train: learn = learn.to_distributed(args.local_rank)

learn.fit_one_cycle(args.epochs, max_lr=args.lr, start_epoch=args.start_epoch)


# make predictions
val_contrib_preds = learn.get_preds(DatasetType.Valid)
test_contrib_preds = learn.get_preds(DatasetType.Test)
val_preds = val_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
test_preds = test_contrib_preds[0][:,-1].detach().numpy() * C.SC_STD + C.SC_MEAN
github fastai / fastai / fastai / distributed.py View on Github external
np.save(cache_path/f'metrics_{self.cuda_id}', stats)

def _learner_parallel(learn:Learner):
    "Use nn.DataParallel when training and remove when done"
    if not torch.cuda.is_available(): warnings.warn('CUDA is not available, check your drivers - training will continue on CPU', ResourceWarning) 
    learn.callbacks.append(ParallelTrainer(learn))
    return learn

def _learner_distributed(learn:Learner, cuda_id:int, cache_dir:PathOrStr='tmp'):
    "Put `learn` on distributed training with `cuda_id`."
    learn.callbacks.append(DistributedTrainer(learn, cuda_id))
    learn.callbacks.append(DistributedRecorder(learn, cuda_id, cache_dir))
    return learn

Learner.to_distributed = _learner_distributed
Learner.to_parallel = _learner_parallel

def read_metrics(cache_path:PathOrStr, n_gpus:int, reduce:bool=True):
    losses,metrics = [],[]
    for i in range(n_gpus):
        losses.append(np.load(cache_path/f'losses_{i}.npy')[None])
        metrics.append(np.load(cache_path/f'metrics_{i}.npy')[None])
    if reduce:
        losses,metrics = np.concatenate(losses,0),np.concatenate(metrics,0)
        return losses.mean(0),metrics.mean(0)
    return losses,metrics

def setup_distrib(gpu:Any=None):
    if gpu is None: return gpu
    gpu = int(gpu)
    torch.cuda.set_device(int(gpu))
    if num_distrib() > 1: