How to use the tensorforce.execution.Runner function in Tensorforce

To help you get started, we’ve selected a few Tensorforce examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tensorforce / tensorforce / examples / scripts / ale.py View on Github external
)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1
    )

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            sps = r.timestep / (time.time() - r.start_time)
            logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
github krfricke / rl-benchmark / benchmark.py View on Github external
for i in xrange(args.experiments):
        config = original_config.copy()

        environment = OpenAIGym(args.gym_id)

        agent = agents[config.agent](states_spec=environment.states,
                                     actions_spec=environment.actions,
                                     network_spec=config.network,
                                     config=config)

        if i == 0 and args.load_model:
            logger.info("Loading model data from file: {}".format(args.load_model))
            agent.load_model(args.load_model)

        runner = Runner(
            agent=agent,
            environment=environment,
            repeat_actions=1,
            history=history_data
            # save_path=args.model,
            # save_episodes=args.save_model
        )

        environment.reset()
        agent.reset()

        logger.info("Starting experiment {}".format(i+1))

        experiment_start_time = int(time.time())
        runner.run(episodes=config.episodes, max_episode_timesteps=config.max_timesteps, episode_finished=episode_finished)
        experiment_end_time = int(time.time())
github tensorforce / tensorforce / examples / scripts / openai_gym_async.py View on Github external
)

    agent = Agent.from_spec(
        spec=agent,
        kwargs=dict(
            states=environment.states,
            actions=environment.actions,
            network=network
        )
    )

    logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format(gym_id=args.gym_id))
    logger.info("Config:")
    logger.info(agent)

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1
    )

    if args.debug:  # TODO: Timestep-based reporting
        report_episodes = 1
    else:
        report_episodes = 100

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            steps_per_second = r.timestep / (time.time() - r.start_time)
            logger.info("Finished episode {} after overall {} timesteps. Steps Per Second {}".format(
                r.agent.episode,
                r.agent.timestep,
github miroblog / tf_deep_rl_trader / ppo_trader.py View on Github external
# PPOAgent
        step_optimizer=dict(
            type='adam',
            learning_rate=(1e-4)  # 1e-4
        ),
        subsampling_fraction=0.2,  # 0.1
        optimization_steps=10,
        execution=dict(
            type='single',
            session_config=None,
            distributed_spec=None
        )
    )

    train_runner = Runner(agent=agent, environment=environment)
    test_runner = Runner(
        agent=agent,
        environment=test_environment,
    )

    train_runner.run(episodes=100, max_episode_timesteps=16000, episode_finished=episode_finished)
    print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
        ep=train_runner.episode,
        ar=np.mean(train_runner.episode_rewards[-100:]))
    )

    test_runner.run(num_episodes=1, deterministic=True, testing=True, episode_finished=print_simple_log)
github tensorforce / tensorforce / examples / quickstart.py View on Github external
# Critic
        critic_network='auto',
        critic_optimizer=dict(optimizer='adam', multi_step=10, learning_rate=1e-3),
        # Preprocessing
        preprocessing=None,
        # Exploration
        exploration=0.0, variable_noise=0.0,
        # Regularization
        l2_regularization=0.0, entropy_regularization=0.0,
        # TensorFlow etc
        name='agent', device=None, parallel_interactions=1, seed=None, execution=None, saver=None,
        summarizer=None, recorder=None
    )

    # Initialize the runner
    runner = Runner(agent=agent, environment=environment)

    # Start the runner
    runner.run(num_episodes=300)
    runner.close()
github miroblog / tf_deep_rl_trader / ppo_trader.py View on Github external
likelihood_ratio_clipping=0.2,
        # PPOAgent
        step_optimizer=dict(
            type='adam',
            learning_rate=(1e-4)  # 1e-4
        ),
        subsampling_fraction=0.2,  # 0.1
        optimization_steps=10,
        execution=dict(
            type='single',
            session_config=None,
            distributed_spec=None
        )
    )

    train_runner = Runner(agent=agent, environment=environment)
    test_runner = Runner(
        agent=agent,
        environment=test_environment,
    )

    train_runner.run(episodes=100, max_episode_timesteps=16000, episode_finished=episode_finished)
    print("Learning finished. Total episodes: {ep}. Average reward of last 100 episodes: {ar}.".format(
        ep=train_runner.episode,
        ar=np.mean(train_runner.episode_rewards[-100:]))
    )

    test_runner.run(num_episodes=1, deterministic=True, testing=True, episode_finished=print_simple_log)
github MultiAgentLearning / playground / pommerman / cli / train_with_tensorforce.py View on Github external
env.set_training_agent(agent.agent_id)
            break

    if args.record_pngs_dir:
        assert not os.path.isdir(args.record_pngs_dir)
        os.makedirs(args.record_pngs_dir)
    if args.record_json_dir:
        assert not os.path.isdir(args.record_json_dir)
        os.makedirs(args.record_json_dir)

    # Create a Proximal Policy Optimization agent
    agent = training_agent.initialize(env)

    atexit.register(functools.partial(clean_up_agents, agents))
    wrapped_env = WrappedEnv(env, visualize=args.render)
    runner = Runner(agent=agent, environment=wrapped_env)
    runner.run(episodes=10, max_episode_timesteps=2000)
    print("Stats: ", runner.episode_rewards, runner.episode_timesteps,
          runner.episode_times)

    try:
        runner.close()
    except AttributeError as e:
        pass
github niksaz / dota2-expert-demo / tensorforce / ppo.py View on Github external
baseline_mode=None,
    baseline=None,
    baseline_optimizer=None,
    gae_lambda=None,
    # PGLRModel
    actions_exploration=dict(
        type='epsilon_decay',
        initial_epsilon=1.0,
        final_epsilon=0.05,
        timesteps=500000,
    ),
    likelihood_ratio_clipping=0.2,
)

# Create the runner
runner = Runner(agent=agent, environment=env)


# Callback function printing episode statistics
def episode_finished(r):
    reward = r.episode_rewards[-1]
    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".format(ep=r.episode,
                                                                                 ts=r.episode_timestep,
                                                                                 reward=reward))
    rewards.append(reward)
    with open('saved_rewards.pkl', 'wb') as output_file:
        pickle.dump(obj=rewards, file=output_file)

    return True


# Start learning