Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_execution(self):
self.start_tests(name='getting-started-execution')
runner = Runner(
agent='test/data/agent.json', environment=dict(environment='gym', level='CartPole'),
max_episode_timesteps=10
)
runner.run(num_episodes=10)
runner.run(num_episodes=5, evaluation=True)
runner.close()
self.finished_test()
# Create agent and environment
environment = Environment.create(
environment='test/data/environment.json', max_episode_timesteps=10
)
agent = Agent.create(agent='test/data/agent.json', environment=environment)
# Train for 200 episodes
for _ in range(10):
states = environment.reset()
terminal = False
while not terminal:
actions = agent.act(states=states)
states, terminal, reward = environment.execute(actions=actions)
agent.observe(terminal=terminal, reward=reward)
# Evaluate for 100 episodes
sum_rewards = 0.0
for _ in range(5):
def test_agent(self):
self.start_tests(name='getting-started-agent')
environment = Environment.create(
environment='gym', level='CartPole', max_episode_timesteps=50
)
self.finished_test()
agent = Agent.create(
agent='tensorforce', environment=environment, update=64,
objective='policy_gradient', reward_estimation=dict(horizon=20)
)
self.finished_test()
agent = Agent.create(
agent='ppo', environment=environment, batch_size=10, learning_rate=1e-3
)
self.finished_test()
agent = Agent.create(agent='test/data/agent.json', environment=environment)
def __init__(self, environment: 'TradingEnvironment', agent: any, max_episode_timesteps: int, agent_kwargs: any = {}, **kwargs):
"""
Arguments:
environment: A `TradingEnvironment` instance for the agent to trade within.
agent: A `Tensorforce` agent or agent specification.
save_best_agent (optional): The runner will automatically save the best agent
kwargs (optional): Optional keyword arguments to adjust the strategy.
"""
self._max_episode_timesteps = max_episode_timesteps
self._save_best_agent = kwargs.get('save_best_agent', False)
self._environment = environment
self._tensorforce_environment = Environment.create(
environment='gym', level=environment, max_episode_timesteps=self._max_episode_timesteps)
self._agent = Agent.create(agent=agent,
environment=self._tensorforce_environment,
max_episode_timesteps=max_episode_timesteps,
**agent_kwargs)
self._runner = Runner(agent=self._agent,
environment=self._tensorforce_environment,
save_best_agent=self._save_best_agent)
if args.visualize:
if args.level is None:
environment = Environment.create(environment=args.environment, visualize=True)
else:
environment = Environment.create(
environment=args.environment, level=args.level, visualize=True
)
elif args.visualize_directory:
if args.level is None:
environment = Environment.create(
environment=args.environment, visualize_directory=args.visualize_directory
)
else:
environment = Environment.create(
environment=args.environment, level=args.level,
visualize_directory=args.visualize_directory
)
else:
if args.level is None:
environment = Environment.create(
environment=args.environment, max_episode_timesteps=args.max_episode_timesteps
)
else:
environment = Environment.create(
environment=args.environment, max_episode_timesteps=args.max_episode_timesteps,
level=args.level
)
for _ in range(args.repeat):
)
elif args.visualize_directory:
if args.level is None:
environment = Environment.create(
environment=args.environment, visualize_directory=args.visualize_directory
)
else:
environment = Environment.create(
environment=args.environment, level=args.level,
visualize_directory=args.visualize_directory
)
else:
if args.level is None:
environment = Environment.create(
environment=args.environment, max_episode_timesteps=args.max_episode_timesteps
)
else:
environment = Environment.create(
environment=args.environment, max_episode_timesteps=args.max_episode_timesteps,
level=args.level
)
for _ in range(args.repeat):
agent_kwargs = dict()
if args.network is not None:
agent_kwargs['network'] = args.network
agent = Agent.create(agent=args.agent, environment=environment, **agent_kwargs)
runner = Runner(agent=agent, environment=environment)
runner.run(
assert args.episodes is not None and args.visualize is not None
rewards = [list() for _ in range(args.episodes)]
timesteps = [list() for _ in range(args.episodes)]
seconds = [list() for _ in range(args.episodes)]
agent_seconds = [list() for _ in range(args.episodes)]
def callback(r):
rewards[r.episodes - 1].append(r.episode_reward)
timesteps[r.episodes - 1].append(r.episode_timestep)
seconds[r.episodes - 1].append(r.episode_second)
agent_seconds[r.episodes - 1].append(r.episode_agent_second)
return True
if args.visualize:
if args.level is None:
environment = Environment.create(environment=args.environment, visualize=True)
else:
environment = Environment.create(
environment=args.environment, level=args.level, visualize=True
)
elif args.visualize_directory:
if args.level is None:
environment = Environment.create(
environment=args.environment, visualize_directory=args.visualize_directory
)
else:
environment = Environment.create(
environment=args.environment, level=args.level,
visualize_directory=args.visualize_directory
)
)
parser.add_argument('--id', type=str, default='worker', help='Unique worker id')
args = parser.parse_args()
if False:
host = nic_name_to_host(nic_name=None)
port = 123
else:
host = 'localhost'
port = None
server = NameServer(run_id=args.id, working_directory=args.directory, host=host, port=port)
nameserver, nameserver_port = server.start()
if args.level is None:
environment = Environment.create(environment=args.environment)
else:
environment = Environment.create(environment=args.environment, level=args.level)
worker = TensorforceWorker(
environment=environment, run_id=args.id, nameserver=nameserver,
nameserver_port=nameserver_port, host=host
)
# TensorforceWorker(run_id, nameserver=None, nameserver_port=None, logger=None, host=None, id=None, timeout=None)
# logger: logging.logger instance, logger used for debugging output
# id: anything with a __str__method, if multiple workers are started in the same process, you MUST provide a unique id for each one of them using the `id` argument.
# timeout: int or float, specifies the timeout a worker will wait for a new after finishing a computation before shutting down. Towards the end of a long run with multiple workers, this helps to shutdown idling workers. We recommend a timeout that is roughly half the time it would take for the second largest budget to finish. The default (None) means that the worker will wait indefinitely and never shutdown on its own.
worker.run(background=True)
# config = cs.sample_configuration().get_dictionary()
# print(config)
def environment(self, environment: 'TradingEnvironment'):
self._environment = environment
self._tensorforce_environment = Environment.create(
environment='gym', level=environment, max_episode_timesteps=self._max_episode_timesteps)
self._runner = Runner(agent=self._agent,
environment=self._tensorforce_environment,
save_best_agent=self._save_best_agent)
args = parser.parse_args()
if False:
host = nic_name_to_host(nic_name=None)
port = 123
else:
host = 'localhost'
port = None
server = NameServer(run_id=args.id, working_directory=args.directory, host=host, port=port)
nameserver, nameserver_port = server.start()
if args.level is None:
environment = Environment.create(environment=args.environment)
else:
environment = Environment.create(environment=args.environment, level=args.level)
worker = TensorforceWorker(
environment=environment, run_id=args.id, nameserver=nameserver,
nameserver_port=nameserver_port, host=host
)
# TensorforceWorker(run_id, nameserver=None, nameserver_port=None, logger=None, host=None, id=None, timeout=None)
# logger: logging.logger instance, logger used for debugging output
# id: anything with a __str__method, if multiple workers are started in the same process, you MUST provide a unique id for each one of them using the `id` argument.
# timeout: int or float, specifies the timeout a worker will wait for a new after finishing a computation before shutting down. Towards the end of a long run with multiple workers, this helps to shutdown idling workers. We recommend a timeout that is roughly half the time it would take for the second largest budget to finish. The default (None) means that the worker will wait indefinitely and never shutdown on its own.
worker.run(background=True)
# config = cs.sample_configuration().get_dictionary()
# print(config)
# res = worker.compute(config=config, budget=1, working_directory='.')
# print(res)