Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _test_dist_operations(sagemaker_session, image_uri, instance_type, dist_backend, train_instance_count=3):
with timeout(minutes=DEFAULT_TIMEOUT):
pytorch = PyTorch(entry_point=dist_operations_path,
role='SageMakerRole',
train_instance_count=train_instance_count,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=image_uri,
hyperparameters={'backend': dist_backend})
pytorch.sagemaker_session.default_bucket()
fake_input = pytorch.sagemaker_session.upload_data(path=dist_operations_path,
key_prefix='pytorch/distributed_operations')
job_name = utils.unique_name_from_base('test-pytorch-dist-ops')
pytorch.fit({'required_argument': fake_input}, job_name=job_name)
def test_training_smdebug(sagemaker_session, ecr_image, instance_type):
hyperparameters = {'random_seed': True, 'num_steps': 50, 'smdebug_path': '/opt/ml/output/tensors', 'epochs': 1,
'data_dir': training_dir}
with timeout(minutes=DEFAULT_TIMEOUT):
pytorch = PyTorch(entry_point=smdebug_mnist_script,
role='SageMakerRole',
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=ecr_image,
hyperparameters=hyperparameters)
training_input = pytorch.sagemaker_session.upload_data(path=training_dir,
key_prefix='pytorch/mnist')
job_name = utils.unique_name_from_base('test-pytorch-smdebug')
pytorch.fit({'training': training_input}, job_name=job_name)
file_system_type="FSxLustre",
directory_path=FSX_DIR_PATH,
num_records=NUM_RECORDS,
feature_dim=FEATURE_DIM,
)
test_records = FileSystemRecordSet(
file_system_id=file_system_fsx_id,
file_system_type="FSxLustre",
directory_path=FSX_DIR_PATH,
num_records=NUM_RECORDS,
feature_dim=FEATURE_DIM,
channel="test",
)
job_name = unique_name_from_base("tune-kmeans-fsx")
tuner.fit([train_records, test_records], job_name=job_name)
tuner.wait()
best_training_job = tuner.best_training_job()
assert best_training_job
entry_point=SCRIPT,
role=ROLE,
train_instance_count=2,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
py_version=tests.integ.PYTHON_VERSION,
script_mode=True,
framework_version=tf_full_version,
distributions=PARAMETER_SERVER_DISTRIBUTION,
)
inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(MNIST_RESOURCE_PATH, "data"), key_prefix="scriptmode/distributed_mnist"
)
with tests.integ.timeout.timeout(minutes=tests.integ.TRAINING_DEFAULT_TIMEOUT_MINUTES):
estimator.fit(inputs=inputs, job_name=unique_name_from_base("test-tf-sm-distributed"))
assert_s3_files_exist(
sagemaker_session,
estimator.model_dir,
["graph.pbtxt", "model.ckpt-0.index", "model.ckpt-0.meta"],
)
def test_dgl_training(sagemaker_session, ecr_image, instance_type):
dgl = MXNet(entry_point=DGL_SCRIPT_PATH,
role='SageMakerRole',
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=ecr_image)
with timeout(minutes=15):
job_name = utils.unique_name_from_base('test-dgl-image')
dgl.fit(job_name=job_name)
tuner = HyperparameterTuner(
estimator,
objective_metric_name,
hyperparameter_ranges,
metric_definitions,
max_jobs=2,
max_parallel_jobs=2,
)
with timeout(minutes=TUNING_DEFAULT_TIMEOUT_MINUTES):
inputs = estimator.sagemaker_session.upload_data(
path=os.path.join(resource_path, "data"), key_prefix="scriptmode/mnist"
)
tuning_job_name = unique_name_from_base("tune-tf-script-mode", max_length=32)
tuner.fit(inputs, job_name=tuning_job_name)
print("Started hyperparameter tuning job with name: " + tuning_job_name)
time.sleep(15)
tuner.wait()
def test_tuning_kmeans_identical_dataset_algorithm_tuner(
sagemaker_session, kmeans_train_set, kmeans_estimator, hyperparameter_ranges
):
"""Tests Identical dataset and algorithm use case with one parent and child job launched with
.identical_dataset_and_algorithm_tuner() """
parent_tuning_job_name = unique_name_from_base("km-iden1-parent", max_length=32)
child_tuning_job_name = unique_name_from_base("km-iden1-child", max_length=32)
parent_tuner = _tune(
kmeans_estimator,
kmeans_train_set,
job_name=parent_tuning_job_name,
hyperparameter_ranges=hyperparameter_ranges,
)
child_tuner = parent_tuner.identical_dataset_and_algorithm_tuner()
_tune(
kmeans_estimator,
kmeans_train_set,
job_name=child_tuning_job_name,
tuner=child_tuner,
max_parallel_jobs=1,
max_jobs=1,
def _test_mnist_deploy(sagemaker_session, instance_type):
model_path = 'test/resources/mnist/model.tar.gz'
script_path = 'test/resources/mnist/mnist.py'
endpoint_name = sagemaker.utils.unique_name_from_base('sagemaker-chainer-test')
model_data = sagemaker_session.upload_data(
path=model_path,
key_prefix='sagemaker-chainer/models',
)
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session, minutes=30):
chainer = ChainerModel(
model_data=model_data,
role='SageMakerRole',
entry_point=script_path,
sagemaker_session=sagemaker_session,
)
predictor = chainer.deploy(initial_instance_count=1, instance_type=instance_type)
batch_size = 100
data = np.zeros(shape=(batch_size, 1, 28, 28), dtype='float32')
def test_lda(sagemaker_session, cpu_instance_type):
job_name = unique_name_from_base("lda")
with timeout(minutes=TRAINING_DEFAULT_TIMEOUT_MINUTES):
data_path = os.path.join(DATA_DIR, "lda")
data_filename = "nips-train_1.pbr"
with open(os.path.join(data_path, data_filename), "rb") as f:
all_records = read_records(f)
# all records must be same
feature_num = int(all_records[0].features["values"].float32_tensor.shape[0])
lda = LDA(
role="SageMakerRole",
train_instance_type=cpu_instance_type,
num_topics=10,
sagemaker_session=sagemaker_session,
def test_async_knn_classifier(sagemaker_session, cpu_instance_type):
job_name = unique_name_from_base("knn")
with timeout(minutes=5):
data_path = os.path.join(DATA_DIR, "one_p_mnist", "mnist.pkl.gz")
pickle_args = {} if sys.version_info.major == 2 else {"encoding": "latin1"}
# Load the data into memory as numpy arrays
with gzip.open(data_path, "rb") as f:
train_set, _, _ = pickle.load(f, **pickle_args)
knn = KNN(
role="SageMakerRole",
train_instance_count=1,
train_instance_type=cpu_instance_type,
k=10,
predictor_type="classifier",
sample_size=500,