Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
elif args.dataset_type == 'classification':
model = RandomForestClassifier(n_estimators=args.num_trees, n_jobs=-1)
else:
raise ValueError(f'dataset_type "{args.dataset_type}" not supported.')
train_targets = train_data.targets()
if train_data.num_tasks() == 1:
train_targets = [targets[0] for targets in train_targets]
model.fit(train_data.features(), train_targets)
test_preds = model.predict(test_data.features())
if train_data.num_tasks() == 1:
test_preds = [[pred] for pred in test_preds]
scores = evaluate_predictions(
preds=test_preds,
targets=test_data.targets(),
metric_func=metric_func,
dataset_type=args.dataset_type
)
return scores
if args.dataset_type == 'regression':
model = RandomForestRegressor(n_estimators=args.num_trees, n_jobs=-1)
elif args.dataset_type == 'classification':
model = RandomForestClassifier(class_weight=args.class_weight, n_estimators=args.num_trees, n_jobs=-1)
else:
raise ValueError(f'dataset_type "{args.dataset_type}" not supported.')
model.fit(train_features, train_targets)
test_preds = model.predict(test_features)
test_preds = [[pred] for pred in test_preds]
test_targets = [[target] for target in test_targets]
score = evaluate_predictions(
preds=test_preds,
targets=test_targets,
metric_func=metric_func,
dataset_type=args.dataset_type
)
scores.append(score[0])
return scores
info(f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}')
writer.add_scalar(f'test_{task_name}_{args.metric}', test_score, n_iter)
# Evaluate ensemble on test set
if args.dataset_type == 'bert_pretraining':
avg_test_preds = {
'features': (sum_test_preds['features'] / args.ensemble_size).tolist() if sum_test_preds['features'] is not None else None,
'vocab': (sum_test_preds['vocab'] / args.ensemble_size).tolist()
}
else:
avg_test_preds = (sum_test_preds / args.ensemble_size).tolist()
if len(test_data) == 0: # just return some garbage when we didn't want test data
ensemble_scores = test_scores
else:
ensemble_scores = evaluate_predictions(
preds=avg_test_preds,
targets=test_targets,
metric_func=metric_func,
dataset_type=args.dataset_type,
args=args,
logger=logger
)
# Average ensemble score
if args.dataset_type == 'bert_pretraining':
if ensemble_scores['features'] is not None:
info(f'Ensemble test features rmse = {ensemble_scores["features"]:.6f}')
writer.add_scalar('ensemble_test_features_rmse', ensemble_scores['features'], 0)
ensemble_scores = [ensemble_scores['vocab']]
avg_ensemble_test_score = np.nanmean(ensemble_scores)
test_data_intersect, test_data_nonintersect = [], []
for d in test_data.data:
if d.smiles in overlap_smiles:
test_data_intersect.append(d)
else:
test_data_nonintersect.append(d)
test_data_intersect, test_data_nonintersect = MoleculeDataset(test_data_intersect), MoleculeDataset(test_data_nonintersect)
for name, td in [('Intersect', test_data_intersect), ('Nonintersect', test_data_nonintersect)]:
test_preds = predict(
model=model,
data=td,
args=args,
scaler=scaler,
logger=logger
)
test_scores = evaluate_predictions(
preds=test_preds,
targets=td.targets(),
metric_func=metric_func,
dataset_type=args.dataset_type,
args=args,
logger=logger
)
avg_test_score = np.nanmean(test_scores)
info(f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}')
if len(test_data) == 0: # just get some garbage results without crashing; in this case we didn't care anyway
test_preds, test_scores = sum_test_preds, [0 for _ in range(len(args.task_names))]
else:
test_preds = predict(
model=model,
data=test_data,
logger=logger
)
avg_test_score = np.nanmean(test_scores)
info(f'Model {model_idx} test {args.metric} for {name} = {avg_test_score:.6f}')
if len(test_data) == 0: # just get some garbage results without crashing; in this case we didn't care anyway
test_preds, test_scores = sum_test_preds, [0 for _ in range(len(args.task_names))]
else:
test_preds = predict(
model=model,
data=test_data,
args=args,
scaler=scaler,
logger=logger
)
test_scores = evaluate_predictions(
preds=test_preds,
targets=test_targets,
metric_func=metric_func,
dataset_type=args.dataset_type,
args=args,
logger=logger
)
if args.maml:
if sum_test_preds is None:
sum_test_preds = np.zeros(np.array(test_preds).shape)
if args.dataset_type == 'bert_pretraining':
if test_preds['features'] is not None:
sum_test_preds['features'] += np.array(test_preds['features'])
sum_test_preds['vocab'] += np.array(test_preds['vocab'])