Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_training(sagemaker_session, ecr_image, instance_type, instance_count):
hyperparameters = {'random_seed': True,
'num_steps': 50,
'smdebug_path': '/opt/ml/output/tensors',
'epochs': 1}
mx = MXNet(entry_point=SCRIPT_PATH,
role='SageMakerRole',
train_instance_count=instance_count,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
image_name=ecr_image,
hyperparameters=hyperparameters)
with timeout(minutes=15):
prefix = 'mxnet_mnist_gluon_basic_hook_demo/{}'.format(utils.sagemaker_timestamp())
train_input = mx.sagemaker_session.upload_data(path=os.path.join(DATA_PATH, 'train'),
key_prefix=prefix + '/train')
test_input = mx.sagemaker_session.upload_data(path=os.path.join(DATA_PATH, 'test'),
key_prefix=prefix + '/test')
job_name = utils.unique_name_from_base('test-mxnet-image')
mx.fit({'train': train_input, 'test': test_input}, job_name=job_name)
def test_deploy_model(sklearn_training_job, sagemaker_session, cpu_instance_type):
endpoint_name = "test-sklearn-deploy-model-{}".format(sagemaker_timestamp())
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(
TrainingJobName=sklearn_training_job
)
model_data = desc["ModelArtifacts"]["S3ModelArtifacts"]
script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py")
model = SKLearnModel(
model_data,
"SageMakerRole",
entry_point=script_path,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
_predict_and_assert(predictor)
# eval data... repeat so there's enough to cover multicpu/gpu contexts
eval_data = np.array([[7, 2], [6, 10], [12, 2]]).repeat(32, 0)
eval_label = np.array([11, 26, 16]).repeat(32, 0)
# save training data
for path in ['training', 'evaluation']:
os.makedirs(os.path.join(opt_ml, 'input', 'data', path))
np.savetxt(os.path.join(opt_ml, 'input/data/training/train_data.txt'), train_data)
np.savetxt(os.path.join(opt_ml, 'input/data/training/train_label.txt'), train_label)
np.savetxt(os.path.join(opt_ml, 'input/data/evaluation/eval_data.txt'), eval_data)
np.savetxt(os.path.join(opt_ml, 'input/data/evaluation/eval_label.txt'), eval_label)
s3_source_archive = fw_utils.tar_and_upload_dir(session=sagemaker_session.boto_session,
bucket=sagemaker_session.default_bucket(),
s3_key_prefix=sagemaker_timestamp(),
script='linear_regression.py',
directory=resource_path)
utils.create_config_files('linear_regression.py', s3_source_archive.s3_prefix, opt_ml)
os.makedirs(os.path.join(opt_ml, 'model'))
docker_utils.train(docker_image, opt_ml, processor)
for f in ['output/success', 'model/model-symbol.json', 'model/model-0000.params',
'model/model-shapes.json']:
assert os.path.exists(os.path.join(opt_ml, f)), 'expected file not found: {}'.format(f)
additional_env_vars, additional_hps=None, cluster_size=1, source_dir=None,
entrypoint=None, use_gpu=False):
additional_hps = additional_hps or None
session = boto3.Session()
tmpdir = os.path.abspath(optml)
hosts = create_host_names(cluster_size)
print('creating hosts: {}'.format(hosts))
config = create_input_data_config(data_dir)
hyperparameters = read_hyperparameters(additional_hps)
if customer_script:
timestamp = utils.sagemaker_timestamp()
s3_script_path = fw_utils.tar_and_upload_dir(session=session,
bucket=default_bucket(session),
s3_key_prefix='test-{}'.format(timestamp),
script=customer_script,
directory=source_dir)[0]
hyperparameters.update({
'sagemaker_submit_directory': s3_script_path,
'sagemaker_program': os.path.basename(customer_script)
})
for host in hosts:
for d in ['input', 'input/config', 'output', 'model']:
os.makedirs(os.path.join(tmpdir, host, d))
write_hyperparameters(tmpdir, host, hyperparameters)
write_resource_config(tmpdir, hosts, host)
def create_docker_services(command, tmpdir, hosts, image, additional_volumes, additional_env_vars,
customer_script, source_dir, entrypoint, use_gpu=False):
environment = []
session = boto3.Session()
optml_dirs = set()
if command == 'train':
optml_dirs = {'output', 'input'}
elif command == 'serve':
environment.extend(DEFAULT_HOSTING_ENV)
if customer_script:
timestamp = utils.sagemaker_timestamp()
s3_script_path = fw_utils.tar_and_upload_dir(session=session,
bucket=default_bucket(session),
s3_key_prefix='test-{}'.format(timestamp),
script=customer_script,
directory=source_dir)[0]
environment.extend([
'SAGEMAKER_PROGRAM={}'.format(os.path.basename(customer_script)),
'SAGEMAKER_SUBMIT_DIRECTORY={}'.format(s3_script_path)
])
else:
raise ValueError('Unexpected command: {}'.format(command))
environment.extend(credentials_to_env(session))
environment.extend(additional_env_vars)
def create_docker_services(command, tmpdir, hosts, image, additional_volumes, additional_env_vars,
customer_script,
source_dir, entrypoint, use_gpu):
environment = []
session = boto3.Session()
optml_dirs = set()
if command == 'train':
optml_dirs = {'output', 'input'}
elif command == 'serve':
environment.extend(DEFAULT_HOSTING_ENV)
if customer_script:
timestamp = utils.sagemaker_timestamp()
s3_script_path = fw_utils.tar_and_upload_dir(session=session,
bucket=default_bucket(session),
s3_key_prefix='test-{}'.format(timestamp),
script=customer_script,
directory=source_dir)[0]
environment.extend([
'SAGEMAKER_PROGRAM={}'.format(os.path.basename(customer_script)),
'SAGEMAKER_SUBMIT_DIRECTORY={}'.format(s3_script_path)
])
else:
raise ValueError('Unexpected command: {}'.format(command))
environment.extend(credentials_to_env(session))
environment.extend(additional_env_vars)
def create_docker_services(command, tmpdir, hosts, image, additional_volumes, additional_env_vars,
customer_script, source_dir, entrypoint, use_gpu=False):
environment = []
session = boto3.Session()
optml_dirs = set()
if command == 'train':
optml_dirs = {'output', 'input'}
elif command == 'serve':
environment.extend(DEFAULT_HOSTING_ENV)
if customer_script:
timestamp = utils.sagemaker_timestamp()
s3_script_path = fw_utils.tar_and_upload_dir(session=session,
bucket=default_bucket(session),
s3_key_prefix='test-{}'.format(timestamp),
script=customer_script,
directory=source_dir)[0]
environment.extend([
'SAGEMAKER_PROGRAM={}'.format(os.path.basename(customer_script)),
'SAGEMAKER_SUBMIT_DIRECTORY={}'.format(s3_script_path)
])
else:
raise ValueError('Unexpected command: {}'.format(command))
environment.extend(credentials_to_env(session))
environment.extend(additional_env_vars)
def test_mnist_with_checkpoint_config(sagemaker_session, instance_type, tf_full_version):
checkpoint_s3_uri = "s3://{}/checkpoints/tf-{}".format(
sagemaker_session.default_bucket(), sagemaker_timestamp()
)
checkpoint_local_path = "/test/checkpoint/path"
estimator = TensorFlow(
entry_point=SCRIPT,
role="SageMakerRole",
train_instance_count=1,
train_instance_type=instance_type,
sagemaker_session=sagemaker_session,
script_mode=True,
framework_version=tf_full_version,
py_version=tests.integ.PYTHON_VERSION,
metric_definitions=[{"Name": "train:global_steps", "Regex": r"global_step\/sec:\s(.*)"}],
checkpoint_s3_uri=checkpoint_s3_uri,
checkpoint_local_path=checkpoint_local_path,
)
inputs = estimator.sagemaker_session.upload_data(
def test_deploy_model(pytorch_training_job, sagemaker_session, cpu_instance_type):
endpoint_name = "test-pytorch-deploy-model-{}".format(sagemaker_timestamp())
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
desc = sagemaker_session.sagemaker_client.describe_training_job(
TrainingJobName=pytorch_training_job
)
model_data = desc["ModelArtifacts"]["S3ModelArtifacts"]
model = PyTorchModel(
model_data,
"SageMakerRole",
entry_point=MNIST_SCRIPT,
sagemaker_session=sagemaker_session,
)
predictor = model.deploy(1, cpu_instance_type, endpoint_name=endpoint_name)
batch_size = 100
data = numpy.random.rand(batch_size, 1, 28, 28).astype(numpy.float32)
def test_multi_data_model_deploy_train_model_from_amazon_first_party_estimator(
container_image, sagemaker_session, cpu_instance_type
):
timestamp = sagemaker_timestamp()
endpoint_name = "test-multimodel-endpoint-{}".format(timestamp)
model_name = "test-multimodel-{}".format(timestamp)
with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
rcf_model_v1 = __rcf_training_job(
sagemaker_session, container_image, cpu_instance_type, 50, 20
)
model_data_prefix = os.path.join(
"s3://", sagemaker_session.default_bucket(), "multimodel-{}/".format(timestamp)
)
multi_data_model = MultiDataModel(
name=model_name,
model_data_prefix=model_data_prefix,
model=rcf_model_v1,
sagemaker_session=sagemaker_session,