Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
w_trn: training weights
w_val: validation weights
params: ordered dictionary mapping parameters to optimise to list of values to cosnider
n_estimators: number of trees to use in each forest
verbose: Print extra information and show a live plot of model performance
Returns:
params: dictionary mapping parameters to their optimised values
rf: best performing Random Forest
'''
if params is None: params = OrderedDict({'min_samples_leaf': [1,3,5,10,25,50,100], 'max_features': [0.3,0.5,0.7,0.9]})
rf = RandomForestClassifier if 'class' in objective.lower() else RandomForestRegressor
best_params = {'n_estimators': n_estimators, 'n_jobs': -1, 'max_features':'sqrt'}
best_scores = []
scores = []
mb = master_bar(params)
mb.names = ['Best', 'Scores']
if verbose: mb.update_graph([[[],[]], [[], []]])
for param in mb:
pb = progress_bar(params[param], parent=mb)
pb.comment = f'{param} = {params[param][0]}'
for i, value in enumerate(pb):
pb.comment = f'{param} = {params[param][min(i+1, len(params[param])-1)]}'
m = rf(**{**best_params, param: value})
m.fit(X=x_trn, y=y_trn, sample_weight=w_trn)
scores.append(m.score(X=x_val, y=y_val, sample_weight=w_val))
if len(best_scores) == 0 or scores[-1] > best_scores[-1]:
best_scores.append(scores[-1])
best_params[param] = value
if verbose: print(f'Better score schieved: {param} @ {value} = {best_scores[-1]:.4f}')
best_m = m
else:
def begin_fit(self):
self.mbar = master_bar(range(self.epochs))
self.mbar.on_iter_begin()
self.run.logger = partial(self.mbar.write, table=True)
.. warning::
The user should never try to change this function in subclass. It is too delicate and
changing affects every other function present in this ``Trainer`` class.
This function controls the execution of all the components of the ``Trainer``. It controls the
``logger``, ``train_iter``, ``save_model``, ``eval_ops`` and ``optim_ops``.
Args:
data_loader (torch.utils.data.DataLoader): A DataLoader for the trainer to iterate over
and train the models.
"""
for name in self.optimizer_names:
getattr(self, name).zero_grad()
master_bar_iter = master_bar(range(self.start_epoch, self.epochs))
for epoch in master_bar_iter:
start_time = time.time()
master_bar_iter.first_bar.comment = f"Training Progress"
for model in self.model_names:
getattr(self, model).train()
for data in progress_bar(data_loader, parent=master_bar_iter):
master_bar_iter.child.comment = f"Epoch {epoch+1} Progress"
if type(data) is tuple or type(data) is list:
self.real_inputs = data[0].to(self.device)
self.labels = data[1].to(self.device)
elif type(data) is torch.Tensor:
only: Param('whitelist subfolders to include', str, nargs='+') = None,
skip: Param("subfolders to skip", str, nargs='+') = None):
"generate combo dataset"
if skip and only:
print('you can skip subfolder or whitelist them but not both')
return 1
src_dirs = []
for src in sources:
sub_fldrs = subfolders(src)
if skip: src_dirs += [fldr for fldr in sub_fldrs if fldr.stem not in skip]
elif only: src_dirs += [fldr for fldr in sub_fldrs if fldr.stem in only]
else: src_dirs += sub_fldrs
mbar = master_bar(src_dirs)
tif_srcs = []
for src in mbar:
mbar.write(f'process {src.stem}')
tif_srcs += build_tifs(src, mbar=mbar)
tif_src_df = pd.DataFrame(tif_srcs)
tif_src_df[['category','dsplit','multi','ftype','uint8','mean','sd','all_rmax','all_mi','all_ma','mi','ma','rmax','nc','nz','nt','c','z','t','x','y','fn']].to_csv(out, header=True, index=False)
Pandas DataFrame containing mean importance and associated uncertainty for each feature
Examples::
>>> fi = get_ensemble_feat_importance(ensemble, train_fy)
>>>
>>> fi = get_ensemble_feat_importance(ensemble, train_fy
... savename='feat_import')
>>>
>>> fi = get_ensemble_feat_importance(ensemble, train_fy,
... eval_metric=AMS(n_total=100000))
'''
mean_fi = []
std_fi = []
feats = fy.cont_feats + fy.cat_feats
model_bar = master_bar(ensemble.models)
for m, model in enumerate(model_bar): # Average over models per fold
fi = get_nn_feat_importance(model, fy, eval_metric=eval_metric, plot=False, pb_parent=model_bar)
mean_fi.append(fi.Importance.values)
std_fi.append(fi.Uncertainty.values)
mean_fi = np.array(mean_fi)
std_fi = np.array(std_fi)
bs_mean = mp_run([{'data': mean_fi[:,i], 'mean': True, 'name': i, 'n':100} for i in range(len(feats))], bootstrap_stats)
bs_std = mp_run([{'data': std_fi[:,i], 'mean': True, 'name': i, 'n':100} for i in range(len(feats))], bootstrap_stats)
fi = pd.DataFrame({
'Feature':feats,
'Importance': [np.mean(bs_mean[f'{i}_mean']) for i in range(len(feats))],
'Uncertainty': [np.mean(bs_std[f'{i}_mean']) for i in range(len(feats))]}).sort_values('Importance', ascending=False).reset_index(drop=True)
print("Top ten most important features:\n", fi[:min(len(fi), 10)])
total_loss = (start_loss + end_loss) / 2
loss = tf.reduce_sum(total_loss) * (1.0 / train_batch_size)
grads = tape.gradient(loss, model.trainable_variables)
gradient_accumulator(grads)
return total_loss
per_example_losses = strategy.experimental_run_v2(step_fn, args=(train_features, train_labels))
mean_loss = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_example_losses, axis=0)
return mean_loss
current_time = datetime.datetime.now()
train_iterator = master_bar(range(args.num_train_epochs))
global_step = 0
logging_loss = 0.0
for epoch in train_iterator:
epoch_iterator = progress_bar(
train_dataset, total=num_train_steps, parent=train_iterator, display=args.n_device > 1
)
step = 1
with strategy.scope():
for train_features, train_labels in epoch_iterator:
loss = train_step(train_features, train_labels)
if step % args.gradient_accumulation_steps == 0:
strategy.experimental_run_v2(apply_gradients)
# calculate overlap frames
self._next = int(self.n_clip - self.n_clip*self.overlap)
self._i = 0
self._end_flag = True
self._start_frames = None
self._frame = None
self._file_name = None
self._time_depth = None
self._frame_height = None
self._frame_width = None
self._video_data = None
self.mb = master_bar(self.root_dir)
self.mb_iter = iter(self.mb)
model = ClassificationPointNet(num_classes=train_dataset.NUM_CLASSIFICATION_CLASSES,
point_dimension=train_dataset.POINT_DIMENSION)
elif task == 'segmentation':
model = SegmentationPointNet(num_classes=train_dataset.NUM_SEGMENTATION_CLASSES,
point_dimension=train_dataset.POINT_DIMENSION)
else:
raise Exception('Unknown task !')
if torch.cuda.is_available():
model.cuda()
if model_checkpoint:
model.load_state_dict(torch.load(model_checkpoint))
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
mb = master_bar(range(epochs))
if not os.path.isdir(output_folder):
os.mkdir(output_folder)
with open(os.path.join(output_folder, 'training_log.csv'), 'w+') as fid:
fid.write('train_loss,test_loss,train_accuracy,test_accuracy\n')
train_loss = []
test_loss = []
train_acc = []
test_acc = []
for epoch in mb:
epoch_train_loss = []
epoch_train_acc = []
batch_number = 0
for data in progress_bar(train_dataloader, parent=mb):
def evaluate(args, strategy, model, tokenizer, labels, pad_token_label_id, mode):
eval_batch_size = args['per_device_eval_batch_size'] * args['n_device']
eval_dataset, size = load_and_cache_examples(args, tokenizer, labels, pad_token_label_id, eval_batch_size, mode=mode)
eval_dataset = strategy.experimental_distribute_dataset(eval_dataset)
preds = None
num_eval_steps = math.ceil(size / eval_batch_size)
master = master_bar(range(1))
eval_iterator = progress_bar(eval_dataset, total=num_eval_steps, parent=master, display=args['n_device'] > 1)
loss_fct = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
loss = 0.0
logging.info("***** Running evaluation *****")
logging.info(" Num examples = %d", size)
logging.info(" Batch size = %d", eval_batch_size)
for eval_features, eval_labels in eval_iterator:
inputs = {'attention_mask': eval_features['input_mask'], 'training': False}
if args['model_type'] != "distilbert":
inputs["token_type_ids"] = eval_features['segment_ids'] if args['model_type'] in ["bert", "xlnet"] else None
with strategy.scope():
logits = model(eval_features['input_ids'], **inputs)[0]