How to use the tensorforce.util.map_tensors function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
Args:
            terminal: The 1D tensor (bool) of terminal signals to process (more than one True within that list is ok).
            reward: The 1D tensor (float) of rewards to process.

        Returns: Tf op to fetch when `observe()` is called.
        """
        # Increment episode
        num_episodes = tf.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype('int'))
        increment_episode = tf.assign_add(ref=self.episode, value=tf.to_int64(x=num_episodes))
        increment_global_episode = tf.assign_add(ref=self.global_episode, value=tf.to_int64(x=num_episodes))

        with tf.control_dependencies(control_inputs=(increment_episode, increment_global_episode)):
            # Stop gradients
            fn = (lambda x: tf.stop_gradient(input=x[:self.list_buffer_index[index]]))
            states = util.map_tensors(fn=fn, tensors=self.list_states_buffer, index=index)
            internals = util.map_tensors(fn=fn, tensors=self.list_internals_buffer, index=index)
            actions = util.map_tensors(fn=fn, tensors=self.list_actions_buffer, index=index)
            terminal = tf.stop_gradient(input=terminal)
            reward = tf.stop_gradient(input=reward)

            # Observation
            observation = self.fn_observe_timestep(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward
            )

        with tf.control_dependencies(control_inputs=(observation,)):
            # Reset buffer index.
            reset_index = tf.assign(ref=self.list_buffer_index[index], value=0)
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
internals (any): Internal list.
            terminal (bool): boolean indicating if the episode terminated after the observation.
            reward (float): scalar reward that resulted from executing the action.

        Returns: Tf op to fetch when `observe()` is called.
        """
        # Increment episode
        num_episodes = tf.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype('int'))
        increment_episode = tf.assign_add(ref=self.episode, value=tf.to_int64(x=num_episodes))
        increment_global_episode = tf.assign_add(ref=self.global_episode, value=tf.to_int64(x=num_episodes))

        with tf.control_dependencies(control_inputs=(increment_episode, increment_global_episode)):
            # Stop gradients
            # Not using buffers here.
            states = util.map_tensors(fn=tf.stop_gradient, tensors=states)
            internals = util.map_tensors(fn=tf.stop_gradient, tensors=internals)
            actions = util.map_tensors(fn=tf.stop_gradient, tensors=actions)
            terminal = tf.stop_gradient(input=terminal)
            reward = tf.stop_gradient(input=reward)

            # Observation
            observation = self.fn_observe_timestep(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward
            )

        with tf.control_dependencies(control_inputs=(observation,)):
            # Trivial operation to enforce control dependency.
            self.unbuffered_episode_output = self.global_episode + 0
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
#     internals=internals,
                #     update=update,
                #     deterministic=deterministic
                # )
                # self.fn_loss_per_instance(
                #     states=states,
                #     internals=internals,
                #     actions=actions,
                #     terminal=terminal,
                #     reward=reward,
                #     update=update
                # )
                self.fn_initialize()

                # Input tensors
                states = util.map_tensors(fn=tf.identity, tensors=self.states_input)
                internals = util.map_tensors(fn=tf.identity, tensors=self.internals_input)
                actions = util.map_tensors(fn=tf.identity, tensors=self.actions_input)
                terminal = tf.identity(input=self.terminal_input)
                reward = tf.identity(input=self.reward_input)
                # Probably both deterministic and independent should be the same at some point.
                deterministic = tf.identity(input=self.deterministic_input)
                independent = tf.identity(input=self.independent_input)

                states, actions, reward = self.fn_preprocess(states=states, actions=actions, reward=reward)

                self.create_operations(
                    states=states,
                    internals=internals,
                    actions=actions,
                    terminal=terminal,
                    reward=reward,
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
terminal (bool): boolean indicating if the episode terminated after the observation.
            reward (float): scalar reward that resulted from executing the action.

        Returns: Tf op to fetch when `observe()` is called.
        """
        # Increment episode
        num_episodes = tf.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype('int'))
        increment_episode = tf.assign_add(ref=self.episode, value=tf.to_int64(x=num_episodes))
        increment_global_episode = tf.assign_add(ref=self.global_episode, value=tf.to_int64(x=num_episodes))

        with tf.control_dependencies(control_inputs=(increment_episode, increment_global_episode)):
            # Stop gradients
            # Not using buffers here.
            states = util.map_tensors(fn=tf.stop_gradient, tensors=states)
            internals = util.map_tensors(fn=tf.stop_gradient, tensors=internals)
            actions = util.map_tensors(fn=tf.stop_gradient, tensors=actions)
            terminal = tf.stop_gradient(input=terminal)
            reward = tf.stop_gradient(input=reward)

            # Observation
            observation = self.fn_observe_timestep(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward
            )

        with tf.control_dependencies(control_inputs=(observation,)):
            # Trivial operation to enforce control dependency.
            self.unbuffered_episode_output = self.global_episode + 0
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
actions (any): One action (usually a value tuple) or dict of states if multiple actions are expected.
            internals (any): Internal list.
            terminal (bool): boolean indicating if the episode terminated after the observation.
            reward (float): scalar reward that resulted from executing the action.

        Returns: Tf op to fetch when `observe()` is called.
        """
        # Increment episode
        num_episodes = tf.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype('int'))
        increment_episode = tf.assign_add(ref=self.episode, value=tf.to_int64(x=num_episodes))
        increment_global_episode = tf.assign_add(ref=self.global_episode, value=tf.to_int64(x=num_episodes))

        with tf.control_dependencies(control_inputs=(increment_episode, increment_global_episode)):
            # Stop gradients
            # Not using buffers here.
            states = util.map_tensors(fn=tf.stop_gradient, tensors=states)
            internals = util.map_tensors(fn=tf.stop_gradient, tensors=internals)
            actions = util.map_tensors(fn=tf.stop_gradient, tensors=actions)
            terminal = tf.stop_gradient(input=terminal)
            reward = tf.stop_gradient(input=reward)

            # Observation
            observation = self.fn_observe_timestep(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward
            )

        with tf.control_dependencies(control_inputs=(observation,)):
            # Trivial operation to enforce control dependency.
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
terminal: The 1D tensor (bool) of terminal signals to process (more than one True within that list is ok).
            reward: The 1D tensor (float) of rewards to process.

        Returns: Tf op to fetch when `observe()` is called.
        """
        # Increment episode
        num_episodes = tf.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype('int'))
        increment_episode = tf.assign_add(ref=self.episode, value=tf.to_int64(x=num_episodes))
        increment_global_episode = tf.assign_add(ref=self.global_episode, value=tf.to_int64(x=num_episodes))

        with tf.control_dependencies(control_inputs=(increment_episode, increment_global_episode)):
            # Stop gradients
            fn = (lambda x: tf.stop_gradient(input=x[:self.list_buffer_index[index]]))
            states = util.map_tensors(fn=fn, tensors=self.list_states_buffer, index=index)
            internals = util.map_tensors(fn=fn, tensors=self.list_internals_buffer, index=index)
            actions = util.map_tensors(fn=fn, tensors=self.list_actions_buffer, index=index)
            terminal = tf.stop_gradient(input=terminal)
            reward = tf.stop_gradient(input=reward)

            # Observation
            observation = self.fn_observe_timestep(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward
            )

        with tf.control_dependencies(control_inputs=(observation,)):
            # Reset buffer index.
            reset_index = tf.assign(ref=self.list_buffer_index[index], value=0)
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
def create_observe_operations(self, terminal, reward):
        # Increment episode
        num_episodes = tf.count_nonzero(input_tensor=terminal, dtype=util.tf_dtype('int'))
        increment_episode = tf.assign_add(ref=self.episode, value=num_episodes)
        increment_global_episode = tf.assign_add(ref=self.global_episode, value=num_episodes)

        with tf.control_dependencies(control_inputs=(increment_episode, increment_global_episode)):
            # Stop gradients
            fn = (lambda x: tf.stop_gradient(input=x[:self.buffer_index]))
            states = util.map_tensors(fn=fn, tensors=self.states_buffer)
            internals = util.map_tensors(fn=fn, tensors=self.internals_buffer)
            actions = util.map_tensors(fn=fn, tensors=self.actions_buffer)
            terminal = tf.stop_gradient(input=terminal)
            reward = tf.stop_gradient(input=reward)

            # Observation
            observation = self.fn_observe_timestep(
                states=states,
                internals=internals,
                actions=actions,
                terminal=terminal,
                reward=reward
            )

        with tf.control_dependencies(control_inputs=(observation,)):
            # Reset index
            reset_index = tf.assign(ref=self.buffer_index, value=0)
github tensorforce / tensorforce / tensorforce / models / model.py View on Github external
#     update=update,
                #     deterministic=deterministic
                # )
                # self.fn_loss_per_instance(
                #     states=states,
                #     internals=internals,
                #     actions=actions,
                #     terminal=terminal,
                #     reward=reward,
                #     update=update
                # )
                self.fn_initialize()

                # Input tensors
                states = util.map_tensors(fn=tf.identity, tensors=self.states_input)
                internals = util.map_tensors(fn=tf.identity, tensors=self.internals_input)
                actions = util.map_tensors(fn=tf.identity, tensors=self.actions_input)
                terminal = tf.identity(input=self.terminal_input)
                reward = tf.identity(input=self.reward_input)
                # Probably both deterministic and independent should be the same at some point.
                deterministic = tf.identity(input=self.deterministic_input)
                independent = tf.identity(input=self.independent_input)

                states, actions, reward = self.fn_preprocess(states=states, actions=actions, reward=reward)

                self.create_operations(
                    states=states,
                    internals=internals,
                    actions=actions,
                    terminal=terminal,
                    reward=reward,
                    deterministic=deterministic,