Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
predictor = task.load(savedir)
y_pred = predictor.predict(test_data)
perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
if dataset['problem_type'] != REGRESSION:
perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
else:
perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
performance_vals[idx] = perf
print("Performance on dataset %s: %s (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
predictor = task.load(savedir)
y_pred = predictor.predict(test_data)
perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
if dataset['problem_type'] != REGRESSION:
perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
else:
perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
performance_vals[idx] = perf
print("Performance on dataset %s: %s (previous perf=%s)" % (dataset['name'], performance_vals[idx], dataset['performance_val']))
if (not fast_benchmark) and (performance_vals[idx] > dataset['performance_val'] * perf_threshold):
warnings.warn("Performance on dataset %s is %s times worse than previous performance." %
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
predictor = task.load(savedir)
y_pred = predictor.predict(test_data)
perf_dict = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
if dataset['problem_type'] != REGRESSION:
perf = 1.0 - perf_dict['accuracy_score'] # convert accuracy to error-rate
else:
perf = 1.0 - perf_dict['r2_score'] # unexplained variance score.
"""
import autogluon as ag
from autogluon import TabularPrediction as task
# Training time:
train_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(100) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_hpo_models/' # where to save trained models
hyperparams = {'NN': {'num_epochs': 10, 'activation': 'relu', 'dropout_prob': ag.Real(0.0,0.5)},
'GBM': {'num_boost_round': 1000, 'learning_rate': ag.Real(0.01,0.1,log=True)} }
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=True, hyperparameters=hyperparams,
num_trials=5, time_limits=1*60, num_bagging_folds=0, stack_ensemble_levels=0) # since tuning_data = None, automatically determines train/validation split
results = predictor.fit_summary() # display detailed summary of fit() process
# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame
print(test_data.head())
perf = predictor.evaluate(test_data) # shorthand way to evaluate our predictor if test-labels available
# Otherwise we make predictions and can evaluate them later:
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column],axis=1) # Delete labels from test data since we wouldn't have them in practice
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
""" Example script for predicting columns of tables, demonstrating simple use-case """
from autogluon import TabularPrediction as task
# Training time:
train_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(500) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_models/' # where to save trained models
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir) # since tuning_data = None, automatically determines train/validation split
results = predictor.fit_summary() # display summary of models trained during fit()
# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column],axis=1) # delete labels from test data since we wouldn't have them in practice
print(test_data.head())
predictor = task.load(savedir) # Unnecessary, we reload predictor just to demonstrate how to load previously-trained predictor from file
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)