Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
mx.random.seed(seed_val)
dataset = datasets[idx]
print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
directory = dataset['name'] + "/"
train_file_path = directory + train_file
test_file_path = directory + test_file
if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
mx.random.seed(seed_val)
dataset = datasets[idx]
print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
directory = dataset['name'] + "/"
train_file_path = directory + train_file
test_file_path = directory + test_file
if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
predictor = None # We delete predictor here to test loading previously-trained predictor from file
np.random.seed(seed_val)
mx.random.seed(seed_val)
dataset = datasets[idx]
print("Evaluating Benchmark Dataset %s (%d of %d)" % (dataset['name'], idx+1, len(datasets)))
directory = dataset['name'] + "/"
train_file_path = directory + train_file
test_file_path = directory + test_file
if (not os.path.exists(train_file_path)) or (not os.path.exists(test_file_path)):
# fetch files from s3:
print("%s data not found locally, so fetching from %s" % (dataset['name'], dataset['url']))
os.system("wget " + dataset['url'] + " -O temp.zip && unzip -o temp.zip && rm temp.zip")
savedir = directory + 'AutogluonOutput/'
shutil.rmtree(savedir, ignore_errors=True) # Delete AutoGluon output directory to ensure previous runs' information has been removed.
label_column = dataset['label_column']
train_data = task.Dataset(file_path=train_file_path)
test_data = task.Dataset(file_path=test_file_path)
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column], axis=1)
if fast_benchmark:
train_data = train_data.head(subsample_size) # subsample for fast_benchmark
predictor = None # reset from last Dataset
if fast_benchmark:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, hyperparameters=hyperparameters,
time_limits=time_limits, num_trials=num_trials, verbosity=verbosity)
else:
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=hyperparameter_tune, verbosity=verbosity)
results = predictor.fit_summary(verbosity=0)
if predictor.problem_type != dataset['problem_type']:
warnings.warn("For dataset %s: Autogluon inferred problem_type = %s, but should = %s" % (dataset['name'], predictor.problem_type, dataset['problem_type']))
""" Example script for predicting columns of tables, demonstrating simple use-case """
from autogluon import TabularPrediction as task
# Training time:
train_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(500) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_models/' # where to save trained models
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir) # since tuning_data = None, automatically determines train/validation split
results = predictor.fit_summary() # display summary of models trained during fit()
# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column],axis=1) # delete labels from test data since we wouldn't have them in practice
print(test_data.head())
predictor = task.load(savedir) # Unnecessary, we reload predictor just to demonstrate how to load previously-trained predictor from file
y_pred = predictor.predict(test_data)
train_data = train_data.head(100) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_hpo_models/' # where to save trained models
hyperparams = {'NN': {'num_epochs': 10, 'activation': 'relu', 'dropout_prob': ag.Real(0.0,0.5)},
'GBM': {'num_boost_round': 1000, 'learning_rate': ag.Real(0.01,0.1,log=True)} }
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=True, hyperparameters=hyperparams,
num_trials=5, time_limits=1*60, num_bagging_folds=0, stack_ensemble_levels=0) # since tuning_data = None, automatically determines train/validation split
results = predictor.fit_summary() # display detailed summary of fit() process
# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame
print(test_data.head())
perf = predictor.evaluate(test_data) # shorthand way to evaluate our predictor if test-labels available
# Otherwise we make predictions and can evaluate them later:
y_test = test_data[label_column]
test_data = test_data.drop(labels=[label_column],axis=1) # Delete labels from test data since we wouldn't have them in practice
y_pred = predictor.predict(test_data)
perf = predictor.evaluate_predictions(y_true=y_test, y_pred=y_pred, auxiliary_metrics=True)
""" Example script for predicting columns of tables, demonstrating more advanced usage of fit().
Note that all settings demonstrated here are just chosen for demonstration purposes (to minimize runtime),
and do not represent wise choices to use in practice.
"""
import autogluon as ag
from autogluon import TabularPrediction as task
# Training time:
train_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/train.csv') # can be local CSV file as well, returns Pandas DataFrame
train_data = train_data.head(100) # subsample for faster demo
print(train_data.head())
label_column = 'class' # specifies which column do we want to predict
savedir = 'ag_hpo_models/' # where to save trained models
hyperparams = {'NN': {'num_epochs': 10, 'activation': 'relu', 'dropout_prob': ag.Real(0.0,0.5)},
'GBM': {'num_boost_round': 1000, 'learning_rate': ag.Real(0.01,0.1,log=True)} }
predictor = task.fit(train_data=train_data, label=label_column, output_directory=savedir,
hyperparameter_tune=True, hyperparameters=hyperparams,
num_trials=5, time_limits=1*60, num_bagging_folds=0, stack_ensemble_levels=0) # since tuning_data = None, automatically determines train/validation split
results = predictor.fit_summary() # display detailed summary of fit() process
# Inference time:
test_data = task.Dataset(file_path='https://autogluon.s3-us-west-2.amazonaws.com/datasets/Inc/test.csv') # another Pandas DataFrame