How to use the mlagents.trainers.models.LearningModel function in mlagents

To help you get started, we’ve selected a few mlagents examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / sac / models.py View on Github external
with tf.variable_scope(scope):
            mu = tf.layers.dense(
                hidden_policy,
                self.act_size[0],
                activation=None,
                name="mu",
                kernel_initializer=LearningModel.scaled_init(0.01),
            )

            # Policy-dependent log_sigma_sq
            log_sigma_sq = tf.layers.dense(
                hidden_policy,
                self.act_size[0],
                activation=None,
                name="log_std",
                kernel_initializer=LearningModel.scaled_init(0.01),
            )

            self.log_sigma_sq = tf.clip_by_value(log_sigma_sq, LOG_STD_MIN, LOG_STD_MAX)

            sigma_sq = tf.exp(self.log_sigma_sq)

            # Do the reparameterization trick
            policy_ = mu + tf.random_normal(tf.shape(mu)) * sigma_sq

            _gauss_pre = -0.5 * (
                ((policy_ - mu) / (tf.exp(self.log_sigma_sq) + EPSILON)) ** 2
                + 2 * self.log_sigma_sq
                + np.log(2 * np.pi)
            )

            all_probs = tf.reduce_sum(_gauss_pre, axis=1, keepdims=True)
github StepNeverStop / RLs / mlagents / trainers / ppo / models.py View on Github external
self.memory_in[:, _half_point:],
                self.sequence_length,
                name="lstm_value",
            )
            self.memory_out = tf.concat(
                [memory_policy_out, memory_value_out], axis=1, name="recurrent_out"
            )
        else:
            hidden_policy = hidden_streams[0]
            hidden_value = hidden_streams[1]

        mu = tf.layers.dense(
            hidden_policy,
            self.act_size[0],
            activation=None,
            kernel_initializer=LearningModel.scaled_init(0.01),
            reuse=tf.AUTO_REUSE,
        )

        self.log_sigma_sq = tf.get_variable(
            "log_sigma_squared",
            [self.act_size[0]],
            dtype=tf.float32,
            initializer=tf.zeros_initializer(),
        )

        sigma_sq = tf.exp(self.log_sigma_sq)

        self.epsilon = tf.placeholder(
            shape=[None, self.act_size[0]], dtype=tf.float32, name="epsilon"
        )
        # Clip and scale output to ensure actions are always within [-1, 1] range.
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / sac / models.py View on Github external
from mlagents.trainers.models import LearningModel, LearningRateSchedule, EncoderType

LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6  # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

LOGGER = logging.getLogger("mlagents.trainers")

POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"


class SACNetwork(LearningModel):
    """
    Base class for an SAC network. Implements methods for creating the actor and critic heads.
    """

    def __init__(
        self,
        brain,
        m_size=None,
        h_size=128,
        normalize=False,
        use_recurrent=False,
        num_layers=2,
        stream_names=None,
        seed=0,
        vis_encode_type=EncoderType.SIMPLE,
    ):
github StepNeverStop / RLs / mlagents / trainers / ppo / models.py View on Github external
import logging
from typing import Optional

import numpy as np
from mlagents.tf_utils import tf
from mlagents.trainers.models import LearningModel, EncoderType, LearningRateSchedule

logger = logging.getLogger("mlagents.trainers")


class PPOModel(LearningModel):
    def __init__(
        self,
        brain,
        lr=1e-4,
        lr_schedule=LearningRateSchedule.LINEAR,
        h_size=128,
        epsilon=0.2,
        beta=1e-3,
        max_step=5e6,
        normalize=False,
        use_recurrent=False,
        num_layers=2,
        m_size=None,
        seed=0,
        stream_names=None,
        vis_encode_type=EncoderType.SIMPLE,
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / components / reward_signals / gail / model.py View on Github external
if self.policy_model.vis_obs_size > 0:
            self.expert_visual_in: List[tf.Tensor] = []
            visual_policy_encoders = []
            visual_expert_encoders = []
            for i in range(self.policy_model.vis_obs_size):
                # Create input ops for next (t+1) visual observations.
                visual_input = self.policy_model.create_visual_input(
                    self.policy_model.brain.camera_resolutions[i],
                    name="gail_visual_observation_" + str(i),
                )
                self.expert_visual_in.append(visual_input)

                encoded_policy_visual = self.policy_model.create_visual_observation_encoder(
                    self.policy_model.visual_in[i],
                    self.encoding_size,
                    LearningModel.swish,
                    1,
                    "gail_stream_{}_visual_obs_encoder".format(i),
                    False,
                )

                encoded_expert_visual = self.policy_model.create_visual_observation_encoder(
                    self.expert_visual_in[i],
                    self.encoding_size,
                    LearningModel.swish,
                    1,
                    "gail_stream_{}_visual_obs_encoder".format(i),
                    True,
                )
                visual_policy_encoders.append(encoded_policy_visual)
                visual_expert_encoders.append(encoded_expert_visual)
            hidden_policy_visual = tf.concat(visual_policy_encoders, axis=1)
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / bc / models.py View on Github external
def __init__(
        self,
        brain,
        h_size=128,
        lr=1e-4,
        n_layers=2,
        m_size=128,
        normalize=False,
        use_recurrent=False,
        seed=0,
    ):
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
        num_streams = 1
        hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
        hidden = hidden_streams[0]
        self.dropout_rate = tf.placeholder(
            dtype=tf.float32, shape=[], name="dropout_rate"
        )
        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
        if self.use_recurrent:
            tf.Variable(
                self.m_size, name="memory_size", trainable=False, dtype=tf.int32
            )
            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
            hidden_reg, self.memory_out = self.create_recurrent_encoder(
                hidden_reg, self.memory_in, self.sequence_length
github dtransposed / Reinforcement-Learning-With-Unity-G.E.A.R / ml-agents / trainers / ppo / models.py View on Github external
Takes a Unity environment and model-specific hyper-parameters and returns the
        appropriate PPO agent model for the environment.
        :param brain: BrainInfo used to generate specific network graph.
        :param lr: Learning rate.
        :param h_size: Size of hidden layers
        :param epsilon: Value for policy-divergence threshold.
        :param beta: Strength of entropy regularization.
        :return: a sub-class of PPOAgent tailored to the environment.
        :param max_step: Total number of training steps.
        :param normalize: Whether to normalize vector observation input.
        :param use_recurrent: Whether to use an LSTM layer in the network.
        :param num_layers Number of hidden layers between encoded input and policy & value layers
        :param m_size: Size of brain memory.
        """
        with tf.variable_scope(scope):
            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
            self.use_curiosity = use_curiosity
            self.with_heuristics = with_heuristics
            if num_layers < 1:
                num_layers = 1
            self.last_reward, self.new_reward, self.update_reward = self.create_reward_encoder()
            if brain.vector_action_space_type == "continuous":
                self.create_cc_actor_critic(h_size, num_layers)
                self.entropy = tf.ones_like(tf.reshape(self.value, [-1])) * self.entropy
            else:
                self.create_dc_actor_critic(h_size, num_layers)
            if self.use_curiosity:
                self.curiosity_enc_size = curiosity_enc_size
                self.curiosity_strength = curiosity_strength
                encoded_state, encoded_next_state = self.create_curiosity_encoders()
                self.create_inverse_model(encoded_state, encoded_next_state)
                self.create_forward_model(encoded_state, encoded_next_state)
github dtransposed / Reinforcement-Learning-With-Unity-G.E.A.R / ml-agents / __backup / trainers / bc / models.py View on Github external
import tensorflow as tf
import tensorflow.contrib.layers as c_layers
from mlagents.trainers.models import LearningModel


class BehavioralCloningModel(LearningModel):
    def __init__(self, brain, h_size=128, lr=1e-4, n_layers=2, m_size=128,
                 normalize=False, use_recurrent=False, scope='PPO', seed=0):
        with tf.variable_scope(scope):
            LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
            num_streams = 1
            hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
            hidden = hidden_streams[0]
            self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=[], name="dropout_rate")
            hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
            if self.use_recurrent:
                tf.Variable(self.m_size, name="memory_size", trainable=False, dtype=tf.int32)
                self.memory_in = tf.placeholder(shape=[None, self.m_size], dtype=tf.float32, name='recurrent_in')
                hidden_reg, self.memory_out = self.create_recurrent_encoder(hidden_reg, self.memory_in,
                                                                            self.sequence_length)
                self.memory_out = tf.identity(self.memory_out, name='recurrent_out')
github StepNeverStop / RLs / mlagents / trainers / sac / models.py View on Github external
from mlagents.trainers.models import LearningModel, LearningRateSchedule, EncoderType

LOG_STD_MAX = 2
LOG_STD_MIN = -20
EPSILON = 1e-6  # Small value to avoid divide by zero
DISCRETE_TARGET_ENTROPY_SCALE = 0.2  # Roughly equal to e-greedy 0.05
CONTINUOUS_TARGET_ENTROPY_SCALE = 1.0  # TODO: Make these an optional hyperparam.

LOGGER = logging.getLogger("mlagents.trainers")

POLICY_SCOPE = ""
TARGET_SCOPE = "target_network"


class SACNetwork(LearningModel):
    """
    Base class for an SAC network. Implements methods for creating the actor and critic heads.
    """

    def __init__(
        self,
        brain,
        m_size=None,
        h_size=128,
        normalize=False,
        use_recurrent=False,
        num_layers=2,
        stream_names=None,
        seed=0,
        vis_encode_type=EncoderType.SIMPLE,
    ):
github Unity-Technologies / ml-agents / ml-agents / mlagents / trainers / bc / models.py View on Github external
def __init__(
        self,
        brain,
        h_size=128,
        lr=1e-4,
        n_layers=2,
        m_size=128,
        normalize=False,
        use_recurrent=False,
        seed=0,
    ):
        LearningModel.__init__(self, m_size, normalize, use_recurrent, brain, seed)
        num_streams = 1
        hidden_streams = self.create_observation_streams(num_streams, h_size, n_layers)
        hidden = hidden_streams[0]
        self.dropout_rate = tf.placeholder(
            dtype=tf.float32, shape=[], name="dropout_rate"
        )
        hidden_reg = tf.layers.dropout(hidden, self.dropout_rate)
        if self.use_recurrent:
            tf.Variable(
                self.m_size, name="memory_size", trainable=False, dtype=tf.int32
            )
            self.memory_in = tf.placeholder(
                shape=[None, self.m_size], dtype=tf.float32, name="recurrent_in"
            )
            hidden_reg, self.memory_out = self.create_recurrent_encoder(
                hidden_reg, self.memory_in, self.sequence_length