Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fit(self, epochs, lr, validate=True, schedule_type="warmup_linear"):
num_train_steps = int(len(self.data.train_dl) / self.grad_accumulation_steps * epochs)
if self.optimizer is None:
self.optimizer, self.schedule = self.get_optimizer(lr , num_train_steps)
t_total = num_train_steps
if self.multi_gpu == False:
t_total = t_total // torch.distributed.get_world_size()
global_step = 0
pbar = master_bar(range(epochs))
for epoch in pbar:
self.model.train()
tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0
for step, batch in enumerate(progress_bar(self.data.train_dl, parent=pbar)):
batch = tuple(t.to(self.device) for t in batch)
input_ids, input_mask, segment_ids, label_ids = batch
if self.is_fp16 and self.multi_label:
label_ids = label_ids.half()
loss = self.model(input_ids, segment_ids, input_mask, label_ids)
if self.multi_gpu:
t_total = num_train_steps
if self.multi_gpu == False:
t_total = t_total // torch.distributed.get_world_size()
global_step = 0
pbar = master_bar(range(epochs))
for epoch in pbar:
self.model.train()
tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0
for step, batch in enumerate(progress_bar(self.data.train_dl, parent=pbar)):
batch = tuple(t.to(self.device) for t in batch)
input_ids, input_mask, segment_ids, label_ids = batch
if self.is_fp16 and self.multi_label:
label_ids = label_ids.half()
loss = self.model(input_ids, segment_ids, input_mask, label_ids)
if self.multi_gpu:
loss = loss.mean() # mean() to average on multi-gpu.
if self.grad_accumulation_steps > 1:
loss = loss / self.grad_accumulation_steps
if self.is_fp16:
self.optimizer.backward(loss)
else:
loss.backward()
visual_activity_concept = visual_activity_concept.cpu().detach().numpy()
# convert matrix to vector : (1,outdim) --> (outdim,)
visual_feature = visual_feature.reshape(visual_feature.shape[1],)
visual_activity_concept = visual_activity_concept.reshape(visual_activity_concept.shape[1],)
# save feature
np.save(os.path.join(visual_feature_out_dir, name), visual_feature)
np.save(os.path.join(activity_concept_out_dir, activity_name(name)), visual_activity_concept)
if args.verbose:
print('save {}.npy'.format(name))
# get movie name
movie_names = []
for movie in root_dir:
movie_names.append(movie.split('/')[-1])
for movie_name in progress_bar(movie_names):
pool_feature(movie_name, activity_concept_out_dir, pooled_activity_concept_out_dir)
if verbose: print("Optimising RF parameters")
rfp, rf = get_opt_rf_params(train_df[train_feats], train_df[targ_name], val_df[train_feats], val_df[targ_name],
objective, w_trn=w_trn, w_val=w_val, n_estimators=n_estimators, params=rf_params, verbose=False)
else:
rfp = rf_params
rfp['n_estimators'] = n_estimators
m = RandomForestClassifier if 'class' in objective.lower() else RandomForestRegressor
rf = m(**rfp)
rf.fit(X=train_df[train_feats], y=train_df[targ_name], sample_weight=w_trn)
if verbose: print("Evalualting importances")
fi = get_rf_feat_importance(rf, train_df[train_feats], train_df[targ_name], w_trn)
orig_score = [rf.score(X=val_df[train_feats], y=val_df[targ_name], sample_weight=w_val)]
if n_rfs > 1:
m = RandomForestClassifier if 'class' in objective.lower() else RandomForestRegressor
for _ in progress_bar(range(n_rfs-1)):
rf = m(**rfp)
rf.fit(X=train_df[train_feats], y=train_df[targ_name], sample_weight=w_trn)
fi = pd.merge(fi, get_rf_feat_importance(rf, train_df[train_feats], train_df[targ_name], w_trn), on='Feature', how='left')
orig_score.append(rf.score(X=val_df[train_feats], y=val_df[targ_name], sample_weight=w_val))
fi['Importance'] = np.mean(fi[[f for f in fi.columns if 'Importance' in f]].values, axis=1)
fi['Uncertainty'] = np.std(fi[[f for f in fi.columns if 'Importance' in f]].values, ddof=1, axis=1)/np.sqrt(n_rfs)
fi.sort_values(by='Importance', ascending=False, inplace=True)
orig_score = uncert_round(np.mean(orig_score), np.std(orig_score, ddof=1))
if verbose: print("Top ten most important features:\n", fi[['Feature', 'Importance']][:min(len(fi), 10)])
if plot_results: plot_importance(fi[:min(len(fi), n_max_display)], threshold=importance_cut, savename=savename, settings=plot_settings)
top_feats = list(fi[fi.Importance >= importance_cut].Feature)
if verbose: print(f"\n{len(top_feats)} features found with importance greater than {importance_cut}:\n", top_feats, '\n')
if len(top_feats) == 0:
if verbose: print(f"Model score: :\t{orig_score[0]}±{orig_score[1]}")
print('No features found to be important, returning all training features. Good luck.')
def parallel(func, arr, max_workers=None):
with ProcessPoolExecutor(max_workers=max_workers) as ex:
futures = [ex.submit(func, o, i) for i, o in enumerate(arr)]
results = []
for f in progress_bar(as_completed(futures), total=len(arr)):
results.append(f.result())
return results
w_trn: training weights
w_val: validation weights
params: ordered dictionary mapping parameters to optimise to list of values to cosnider
n_estimators: number of trees to use in each forest
verbose: Print extra information and show a live plot of model performance
Returns:
params: dictionary mapping parameters to their optimised values
rf: best performing Random Forest
'''
if params is None: params = OrderedDict({'min_samples_leaf': [1,3,5,10,25,50,100], 'max_features': [0.3,0.5,0.7,0.9]})
rf = RandomForestClassifier if 'class' in objective.lower() else RandomForestRegressor
best_params = {'n_estimators': n_estimators, 'n_jobs': -1, 'max_features':'sqrt'}
best_scores = []
scores = []
mb = master_bar(params)
mb.names = ['Best', 'Scores']
if verbose: mb.update_graph([[[],[]], [[], []]])
for param in mb:
pb = progress_bar(params[param], parent=mb)
pb.comment = f'{param} = {params[param][0]}'
for i, value in enumerate(pb):
pb.comment = f'{param} = {params[param][min(i+1, len(params[param])-1)]}'
m = rf(**{**best_params, param: value})
m.fit(X=x_trn, y=y_trn, sample_weight=w_trn)
scores.append(m.score(X=x_val, y=y_val, sample_weight=w_val))
if len(best_scores) == 0 or scores[-1] > best_scores[-1]:
best_scores.append(scores[-1])
best_params[param] = value
if verbose: print(f'Better score schieved: {param} @ {value} = {best_scores[-1]:.4f}')
best_m = m
else:
def begin_fit(self):
self.mbar = master_bar(range(self.epochs))
self.mbar.on_iter_begin()
self.run.logger = partial(self.mbar.write, table=True)
def show_install(show_nvidia_smi:bool=False):
"Print user's setup information"
import platform, fastai.version
rep = []
opt_mods = []
rep.append(["=== Software ===", None])
rep.append(["python", platform.python_version()])
rep.append(["fastai", fastai.__version__])
rep.append(["fastprogress", fastprogress.__version__])
rep.append(["torch", torch.__version__])
# nvidia-smi
cmd = "nvidia-smi"
have_nvidia_smi = False
try: result = subprocess.run(cmd.split(), shell=False, check=False, stdout=subprocess.PIPE)
except: pass
else:
if result.returncode == 0 and result.stdout: have_nvidia_smi = True
# XXX: if nvidia-smi is not available, another check could be:
# /proc/driver/nvidia/version on most systems, since it's the
# currently active version
if have_nvidia_smi:
smi = result.stdout.decode('utf-8')
def show_install(show_nvidia_smi:bool=False):
"Print user's setup information: python -c 'import fastai; fastai.show_install()'"
import platform, fastai.version
rep = []
opt_mods = []
rep.append(["=== Software ===", None])
rep.append(["python", platform.python_version()])
rep.append(["fastai", fastai.__version__])
rep.append(["fastprogress", fastprogress.__version__])
rep.append(["torch", torch.__version__])
# nvidia-smi
cmd = "nvidia-smi"
have_nvidia_smi = False
try:
result = subprocess.run(cmd.split(), shell=False, check=False, stdout=subprocess.PIPE)
except:
pass
else:
if result.returncode == 0 and result.stdout:
have_nvidia_smi = True
# XXX: if nvidia-smi is not available, another check could be:
# /proc/driver/nvidia/version on most systems, since it's the
# currently active version
def fit(self, epochs: int, lr: float,
params_opt_dict: Optional[Dict] = None):
"Main training loop"
# Print logger at the start of the training loop
self.logger.info(self.cfg)
# Initialize the progress_bar
mb = master_bar(range(epochs))
# Initialize optimizer
# Prepare Optimizer may need to be re-written as per use
self.optimizer = self.prepare_optimizer(params_opt_dict)
# Initialize scheduler
# Prepare scheduler may need to re-written as per use
self.lr_scheduler = self.prepare_scheduler(self.optimizer)
# Write the top row display
# mb.write(self.log_keys, table=True)
self.master_bar_write(mb, line=self.log_keys, table=True)
exception = False
met_to_use = None
# Keep record of time until exit
st_time = time.time()
try:
# Loop over epochs