Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
parser.add_argument('--train_path', type=str, required=True,
help='Path to CSV file containing training data in chronological order')
parser.add_argument('--val_path', type=str, required=True,
help='Path to CSV file containing val data in chronological order')
parser.add_argument('--train_save', type=str, required=True,
help='Path to CSV file for new train data')
parser.add_argument('--val_save', type=str, required=True,
help='Path to CSV file for new val data')
parser.add_argument('--val_frac', type=float, default=0.2,
help='frac of data to use for validation')
parser.add_argument('--train_val_save', type=str, required=True,
help='Path to CSV file for combined train and val data')
args = parser.parse_args()
set_logger(logger, args.save_dir, args.quiet)
modify_train_args(args)
modify_hyper_opt_args(args)
# Preprocess train and validation data
resplit(args)
merge_train_val(args)
for path in [args.train_save, args.val_save, args.train_val_save]:
args.data_path = path
args.save_path = path
average_duplicates(args)
# Optimize hyperparameters
args.data_path = args.train_save
args.separate_test_set = args.val_save
optimize_hyperparameters(args)
# Determine best hyperparameters, update args, and train
return render_train(warnings=warnings, errors=errors)
if dataset_type == 'regression' and unique_targets <= {0, 1}:
errors.append('Selected regression dataset but all labels are 0 or 1. Select classification instead.')
return render_train(warnings=warnings, errors=errors)
if gpu is not None:
if gpu == 'None':
args.no_cuda = True
else:
args.gpu = int(gpu)
with TemporaryDirectory() as temp_dir:
args.save_dir = temp_dir
modify_train_args(args)
logger = logging.getLogger('train')
logger.setLevel(logging.DEBUG)
logger.propagate = False
set_logger(logger, args.save_dir, args.quiet)
process = mp.Process(target=progress_bar, args=(args, progress))
process.start()
training = 1
# Run training
task_scores = run_training(args, logger)
process.join()
# Reset globals
training = 0
# Save best hyperparameter settings as JSON config file
with open(args.config_save_path, 'w') as f:
json.dump(best_result['hyperparams'], f, indent=4, sort_keys=True)
if __name__ == '__main__':
parser = ArgumentParser()
add_train_args(parser)
parser.add_argument('--num_iters', type=int, default=20,
help='Number of hyperparameter choices to try')
parser.add_argument('--config_save_path', type=str, required=True,
help='Path to .json file where best hyperparameter settings will be written')
parser.add_argument('--log_path', type=str,
help='(Optional) Path to .log file where all results of the hyperparameter optimization will be written')
args = parser.parse_args()
modify_train_args(args)
grid_search(args)
logger: logging.Logger,
features_dir: str = None):
for dataset_name in experiment_args.datasets:
dataset_type, dataset_path, num_folds, metric = DATASETS[dataset_name]
logger.info(dataset_name)
# Set up args
args = deepcopy(experiment_args)
args.data_path = dataset_path
args.dataset_type = dataset_type
args.save_dir = os.path.join(args.save_dir, dataset_name)
args.num_folds = num_folds
args.metric = metric
if features_dir is not None:
args.features_path = [os.path.join(features_dir, dataset_name + '.pckl')]
modify_train_args(args)
# Set up logging for training
os.makedirs(args.save_dir, exist_ok=True)
fh = logging.FileHandler(os.path.join(args.save_dir, args.log_name))
fh.setLevel(logging.DEBUG)
# Cross validate
TRAIN_LOGGER.addHandler(fh)
mean_score, std_score = cross_validate(args, TRAIN_LOGGER)
TRAIN_LOGGER.removeHandler(fh)
# Record results
logger.info(f'{mean_score} +/- {std_score} {metric}')
temp_model = build_model(args)
logger.info(f'num params: {param_count(temp_model):,}')