How to use the smdebug.pytorch.SaveConfig function in smdebug

To help you get started, we’ve selected a few smdebug examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github awslabs / sagemaker-debugger / tests / pytorch / test_collection.py View on Github external
def test_collection_add(hook=None, out_dir=None):
    hook_created = False
    if hook is None:
        run_id = "trial_" + datetime.now().strftime("%Y%m%d-%H%M%S%f")
        out_dir = "/tmp/" + run_id
        hook = t_hook(
            out_dir=out_dir,
            save_config=SaveConfig(save_steps=[0, 1, 2, 3]),
            include_collections=["relu_activations"],
        )
        hook_created = True

    model = Net().to(torch.device("cpu"))
    hook.register_module(model)
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
    train(model, hook, torch.device("cpu"), optimizer, num_steps=10)
    tr = create_trial(out_dir)
    assert tr
    assert len(tr.tensor_names(collection="relu_activations")) > 0
    assert tr.tensor(tr.tensor_names(collection="relu_activations")[0]).value(0) is not None

    if hook_created:
        shutil.rmtree(out_dir)
github awslabs / sagemaker-debugger / tests / pytorch / test_loss.py View on Github external
def create_net_and_train(out_dir, n_steps, use_loss_module=False, use_loss_functional=False):
    assert (
        use_loss_module != use_loss_functional
    ), "Exactly one of `use_loss_module` and `use_loss_functional` must be true."

    net = Net()
    optimizer = optim.SGD(net.parameters(), lr=0.05, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    hook = smd.Hook(out_dir=out_dir, save_config=smd.SaveConfig(save_interval=1))
    hook.register_module(net)
    if use_loss_module:
        hook.register_loss(criterion)

    batch_size = 1
    # Use the same data at each step to test loss decreasing
    inputs, labels = torch.rand(batch_size, 3, 32, 32), torch.zeros(batch_size).long()
    for _ in range(n_steps):
        optimizer.zero_grad()
        outputs = net(inputs)
        if use_loss_module:
            loss = criterion(outputs, labels)
        if use_loss_functional:
            loss = F.cross_entropy(outputs, labels)
            hook.record_tensor_value("nll_loss", tensor_value=loss)
        loss.backward()
github awslabs / sagemaker-debugger / examples / pytorch / scripts / simple.py View on Github external
def create_hook(output_dir, module=None, hook_type="saveall", save_steps=None):
    # Create a hook that logs weights, biases, gradients and inputs/ouputs of model
    if hook_type == "saveall":
        hook = Hook(
            out_dir=output_dir,
            save_config=SaveConfig(save_steps=save_steps),
            save_all=True,
            export_tensorboard=True,
        )
    elif hook_type == "module-input-output":
        # The names of input and output tensors of a module are in following format
        # Inputs :  _input_, and
        # Output :  _output
        # In order to log the inputs and output of a module, we will create a collection as follows:
        assert module is not None

        # Create a hook that logs weights, biases, gradients and inputs/outputs of model
        hook = Hook(
            out_dir=output_dir,
            save_config=SaveConfig(save_steps=save_steps),
            include_collections=["weights", "gradients", "biases", "l_mod"],
            export_tensorboard=True,
github awslabs / sagemaker-debugger / examples / pytorch / scripts / torch_resnet.py View on Github external
def create_hook(output_dir, module, trial_id="trial-resnet", save_interval=100):
    # With the following SaveConfig, we will save tensors for steps 1, 2 and 3
    # (indexing starts with 0) and then continue to save tensors at interval of
    # 100,000 steps. Note: union operation is applied to produce resulting config
    # of save_steps and save_interval params.
    save_config = SaveConfig(save_interval)

    # The names of input and output tensors of a block are in following format
    # Inputs :  _input_, and
    # Output :  _output
    # In order to log the inputs and output of a model, we will create a collection as follows

    # Create a hook that logs weights, biases, gradients of model while training.
    hook = Hook(out_dir=output_dir)
    return hook
github awslabs / sagemaker-debugger / examples / pytorch / scripts / pytorch_hook_demos.py View on Github external
def create_hook(output_dir, module=None, hook_type="saveall"):
    # Create a hook that logs weights, biases, gradients and inputs/ouputs of model every 10 steps while training.
    if hook_type == "saveall":
        hook = Hook(
            out_dir=output_dir,
            save_config=SaveConfig(save_steps=[i * 10 for i in range(20)]),
            save_all=True,
        )
    elif hook_type == "module-input-output":
        # The names of input and output tensors of a module are in following format
        # Inputs :  _input_, and
        # Output :  _output
        # In order to log the inputs and output of a module, we will create a collection as follows:
        assert module is not None

        # Create a hook that logs weights, biases, gradients and inputs/outputs of model every 5 steps from steps 0-100 while training.
        hook = Hook(
            out_dir=output_dir,
            save_config=SaveConfig(save_steps=[i * 5 for i in range(20)]),
            include_collections=["weights", "gradients", "biases", "l_mod"],
        )
        hook.get_collection("l_mod").add_module_tensors(module, inputs=True, outputs=True)
github awslabs / sagemaker-debugger / examples / pytorch / scripts / simple.py View on Github external
# The names of input and output tensors of a module are in following format
        # Inputs :  _input_, and
        # Output :  _output
        # In order to log the inputs and output of a module, we will create a collection as follows:
        assert module is not None

        # Create a hook that logs weights, biases, gradients and inputs/outputs of model
        hook = Hook(
            out_dir=output_dir,
            save_config=SaveConfig(save_steps=save_steps),
            include_collections=["weights", "gradients", "biases", "l_mod"],
            export_tensorboard=True,
        )
        hook.get_collection("l_mod").add_module_tensors(module, inputs=True, outputs=True)
    elif hook_type == "weights-bias-gradients":
        save_config = SaveConfig(save_steps=save_steps)
        # Create a hook that logs ONLY weights, biases, and gradients
        hook = Hook(out_dir=output_dir, save_config=save_config, export_tensorboard=True)
    return hook
github awslabs / sagemaker-debugger / examples / pytorch / scripts / pytorch_hook_demos.py View on Github external
elif hook_type == "module-input-output":
        # The names of input and output tensors of a module are in following format
        # Inputs :  _input_, and
        # Output :  _output
        # In order to log the inputs and output of a module, we will create a collection as follows:
        assert module is not None

        # Create a hook that logs weights, biases, gradients and inputs/outputs of model every 5 steps from steps 0-100 while training.
        hook = Hook(
            out_dir=output_dir,
            save_config=SaveConfig(save_steps=[i * 5 for i in range(20)]),
            include_collections=["weights", "gradients", "biases", "l_mod"],
        )
        hook.get_collection("l_mod").add_module_tensors(module, inputs=True, outputs=True)
    elif hook_type == "weights-bias-gradients":
        save_config = SaveConfig(save_steps=[i * 5 for i in range(20)])
        # Create a hook that logs ONLY weights, biases, and gradients every 5 steps (from steps 0-100) while training the model.
        hook = Hook(out_dir=output_dir, save_config=save_config)
    return hook