Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _run_sync(self):
decay = self.ema_baseline_decay
for i in tqdm(range(self.num_trials // self.controller_batch_size + 1)):
with mx.autograd.record():
# sample controller_batch_size number of configurations
batch_size = self.num_trials % self.num_trials \
if i == self.num_trials // self.controller_batch_size \
else self.controller_batch_size
if batch_size == 0: continue
configs, log_probs, entropies = self.controller.sample(
batch_size, with_details=True)
# schedule the training tasks and gather the reward
rewards = self.sync_schedule_tasks(configs)
# substract baseline
if self.baseline is None:
self.baseline = rewards[0]
avg_rewards = mx.nd.array([reward - self.baseline for reward in rewards],
ctx=self.controller.context)
# EMA baseline
mx.random.seed(seed_val)
dataset = datasets[idx]
print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
directory = dataset['name'] + "/"
train_file_path = directory + train_file
test_file_path = directory + test_file
if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
mx.random.seed(seed_val)
dataset = datasets[idx]
print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
directory = dataset['name'] + "/"
train_file_path = directory + train_file
test_file_path = directory + test_file
if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
np.random.seed(seed_val)
mx.random.seed(seed_val)
dataset = datasets[idx]
print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
directory = dataset['name'] + "/"
train_file_path = directory + train_file
test_file_path = directory + test_file
if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
predictor = task.load(savedir)
y_pred = predictor.predict(test_data)
perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
if dataset['problem_type'] != REGRESSION:
perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
else:
perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
performance_vals[idx] = perf
print("Performance on dataset %s: %s (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
predictor = task.load(savedir)
y_pred = predictor.predict(test_data)
perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
if dataset['problem_type'] != REGRESSION:
perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
else:
perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
performance_vals[idx] = perf
print("Performance on dataset %s: %s (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
predictor = task.load(savedir)
y_pred = predictor.predict(test_data)
perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
if dataset['problem_type'] != REGRESSION:
perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
else:
perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
hyperparameters = {'GBM': gbm_options, 'NN': nn_options}
num_trials = 3
time_limits = 30
###################################################################
# Each train/test dataset must be located in single directory with the given names.
train_file = 'train_data.csv'
test_file = 'test_data.csv'
seed_val = 0 # random seed
EPS = 1e-10
# Information about each dataset in benchmark is stored in dict.
# performance_val = expected performance on this dataset (lower = better),should update based on previously run benchmarks
binary_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip',
'name': 'AdultIncomeBinaryClassification',
'problem_type': BINARY,
'label_column': 'class',
'performance_val': 0.129} # Mixed types of features.
multi_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/CoverTypeMulticlassClassification.zip',
'name': 'CoverTypeMulticlassClassification',
'problem_type': MULTICLASS,
'label_column': 'Cover_Type',
'performance_val': 0.032} # big dataset with 7 classes, all features are numeric. Runs SLOW.
regression_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/AmesHousingPriceRegression.zip',
'name': 'AmesHousingPriceRegression',
'problem_type': REGRESSION,
'label_column': 'SalePrice',
'performance_val': 0.076} # Regression with mixed feature-types, skewed Y-values.
toyregres_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/toyRegression.zip',
# performance_val = expected performance on this dataset (lower = better),should update based on previously run benchmarks
binary_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip',
'name': 'AdultIncomeBinaryClassification',
'problem_type': BINARY,
'label_column': 'class',
'performance_val': 0.129} # Mixed types of features.
multi_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/CoverTypeMulticlassClassification.zip',
'name': 'CoverTypeMulticlassClassification',
'problem_type': MULTICLASS,
'label_column': 'Cover_Type',
'performance_val': 0.032} # big dataset with 7 classes, all features are numeric. Runs SLOW.
regression_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/AmesHousingPriceRegression.zip',
'name': 'AmesHousingPriceRegression',
'problem_type': REGRESSION,
'label_column': 'SalePrice',
'performance_val': 0.076} # Regression with mixed feature-types, skewed Y-values.
toyregres_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/toyRegression.zip',
'name': 'toyRegression',
'problem_type': REGRESSION,
'label_column': 'y',
'performance_val': 0.183}
# 1-D toy deterministic regression task with: heavy label+feature missingness, extra distraction column in test data
toyclassif_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/toyClassification.zip',
'name': 'toyClassification',
'problem_type': MULTICLASS,
'label_column': 'y',
'performance_val': 0.436}
# 2-D toy noisy, imbalanced 4-class classification task with: feature missingness, out-of-vocabulary feature categories in test data, out-of-vocabulary labels in test data, training column missing from test data, extra distraction columns in test data
train_file = 'train_data.csv'
test_file = 'test_data.csv'
seed_val = 0 # random seed
EPS = 1e-10
# Information about each dataset in benchmark is stored in dict.
# performance_val = expected performance on this dataset (lower = better),should update based on previously run benchmarks
binary_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/AdultIncomeBinaryClassification.zip',
'name': 'AdultIncomeBinaryClassification',
'problem_type': BINARY,
'label_column': 'class',
'performance_val': 0.129} # Mixed types of features.
multi_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/CoverTypeMulticlassClassification.zip',
'name': 'CoverTypeMulticlassClassification',
'problem_type': MULTICLASS,
'label_column': 'Cover_Type',
'performance_val': 0.032} # big dataset with 7 classes, all features are numeric. Runs SLOW.
regression_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/AmesHousingPriceRegression.zip',
'name': 'AmesHousingPriceRegression',
'problem_type': REGRESSION,
'label_column': 'SalePrice',
'performance_val': 0.076} # Regression with mixed feature-types, skewed Y-values.
toyregres_dataset = {'url': 'https://autogluon.s3-us-west-2.amazonaws.com/datasets/toyRegression.zip',
'name': 'toyRegression',
'problem_type': REGRESSION,
'label_column': 'y',
'performance_val': 0.183}
# 1-D toy deterministic regression task with: heavy label+feature missingness, extra distraction column in test data