Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
parser.add_argument(
'-d', '--deterministic', action='store_true', default=False,
help="Choose actions deterministically"
)
args = parser.parse_args()
if args.import_modules is not None:
for module in args.import_modules.split(','):
importlib.import_module(name=module)
environment = OpenAIGym(
gym_id=args.gym, monitor=args.monitor, monitor_safe=args.monitor_safe,
monitor_video=args.monitor_video, visualize=args.visualize
)
agent = Agent.from_spec(
spec=args.agent, states=environment.states(), actions=environment.actions(),
network=args.network
)
runner = Runner(agent=agent, environment=environment)
def callback(r):
if r.episode % 100 == 0:
print(
"================================================\n"
"Average secs/episode over 100 episodes: {time:0.2f}\n"
"Average steps/sec over 100 episodes: {timestep:0.2f}\n"
"Average reward over 100 episodes: {reward100:0.2f}\n"
"Average reward over 500 episodes: {reward500:0.2f}".format(
time=(sum(r.episode_times[-100:]) / 100.0),
timestep=(sum(r.episode_timesteps[-100:]) / sum(r.episode_times[-100:])),
network_spec = json.load(fp=fp)
else:
network_spec = None
logger.info("No network configuration provided.")
if network_spec[0]['type'] == 'conv2d':
agent_config['states_preprocessing'] = [{'type': 'expand_dims',
'axis': -1}]
else:
agent_config['states_preprocessing'] = [{'type': 'flatten'}]
logger.info("Start training")
environment = ViZDoom(args.vizdoom_config)
agent = Agent.from_spec(
spec=agent_config,
kwargs=dict(
states=environment.states,
actions=environment.actions,
network=network_spec,
)
)
runner = Runner(
agent=agent,
environment=environment,
repeat_actions=1
)
def episode_finished(r):
if r.episode % 100 == 0:
display_screen=args.display_screen)
if args.agent_config is not None:
with open(args.agent_config, 'r') as fp:
agent_config = json.load(fp=fp)
else:
raise TensorForceError("No agent configuration provided.")
if args.network_spec is not None:
with open(args.network_spec, 'r') as fp:
network_spec = json.load(fp=fp)
else:
network_spec = None
logger.info("No network configuration provided.")
agent = Agent.from_spec(
spec=agent_config,
kwargs=dict(
states=environment.states,
actions=environment.actions,
network=network_spec
)
)
if args.debug:
logger.info("-" * 16)
logger.info("Configuration:")
logger.info(agent_config)
if args.save:
save_dir = os.path.dirname(args.save)
if not os.path.isdir(save_dir):
if "agent" not in experiment_spec:
raise TensorForceError("No 'agent' configuration found in experiment-spec.")
agent_config = experiment_spec["agent"]
# in case we do epsilon annealing/decay with different values: fix-up agent-config here
if run_mode == "multi-threaded":
agent_configs = []
for i in range(experiment_spec.get("num_workers")):
worker_config = copy.deepcopy(agent_config)
worker_config = vary_epsilon_anneal(worker_config)
agent_configs.append(worker_config)
else:
agent_configs = [agent_config]
agent = Agent.from_spec(
spec=agent_configs[0],
kwargs=dict(
states=environments[0].states,
actions=environments[0].actions,
network=network,
# distributed tensorflow spec?
distributed=dict(
cluster_spec=cluster_spec,
task_index=args.task_index,
parameter_server=(args.job == "ps"),
device=('/job:{}/task:{}'.format(args.job, args.task_index)), # '/cpu:0'
) if run_mode == "distributed" else None,
# Model saver spec (only 1st worker will ever save).
# - don't save for multi-threaded (ThreadedRunner will take care of this)
saver=dict(
load=args.load, # load from an existing checkpoint?
def __del__(self):
if isinstance(self.tf_agent, Agent):
self.tf_agent.close()
if isinstance(self.tf_environment, Environment):
self.tf_environment.close()
environment = Environment.create(
environment=environment, max_episode_timesteps=max_episode_timesteps
)
self.environments(environment)
if evaluation_environment is None:
self.evaluation_environment = None
else:
self.is_eval_environment_external = isinstance(evaluation_environment, Environment)
self.evaluation_environment = Environment.create(
environment=evaluation_environment, max_episode_timesteps=max_episode_timesteps
)
assert self.evaluation_environment.states() == environment.states()
assert self.evaluation_environment.actions() == environment.actions()
self.is_agent_external = isinstance(agent, Agent)
kwargs = dict(parallel_interactions=num_parallel)
self.agent = Agent.create(agent=agent, environment=environment, **kwargs)
self.save_best_agent = save_best_agent
self.episode_rewards = list()
self.episode_timesteps = list()
self.episode_seconds = list()
self.episode_agent_seconds = list()
self.evaluation_rewards = list()
self.evaluation_timesteps = list()
self.evaluation_seconds = list()
self.evaluation_agent_seconds = list()
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
from six.moves import xrange
import numpy as np
from tensorforce.agents import Agent
from tensorforce.core.memories import Memory
class MemoryAgent(Agent):
"""
The `MemoryAgent` class implements a replay memory from
which it samples batches according to some sampling strategy to
update the value function.
"""
def __init__(
self,
states_spec,
actions_spec,
batched_observe,
batch_size,
memory,
first_update,
update_frequency,
repeat_update
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from tensorforce.agents import Agent
class DRLAgent(Agent):
"""
Base class for standard deep reinforcement learning agents, which act according to a policy
parametrized by a neural network and use a memory module for optimization (subclasses of
`MemoryModel` and `DistributionModel`).
"""
def import_experience(self, experiences):
"""
Imports experiences.
Args:
experiences:
"""
if isinstance(experiences, dict):
if self.unique_state:
experiences['states'] = dict(state=experiences['states'])
network_spec = json.load(fp=fp)
else:
network_spec = None
logger.info("No network configuration provided.")
if network_spec[0]['type'] == 'conv2d':
agent_config['states_preprocessing'] = [{'type': 'expand_dims',
'axis': -1}]
else:
agent_config['states_preprocessing'] = [{'type': 'flatten'}]
logger.info("Start training")
environment = Game2048()
agent = Agent.from_spec(
spec=agent_config,
kwargs=dict(
states=environment.states,
actions=environment.actions,
network=network_spec,
)
)
runner = Runner(
agent=agent,
environment=environment,
repeat_actions=1
)
def episode_finished(r):
if r.episode % 100 == 0:
def run_experiment(self, environment, experiment_num=0):
config = copy(self.config)
max_episodes = config.pop('max_episodes', None)
max_timesteps = config.pop('max_timesteps', None)
max_episode_timesteps = config.pop('max_episode_timesteps')
network_spec = config.pop('network')
agent = Agent.from_spec(
spec=config,
kwargs=dict(
states=environment.states,
actions=environment.actions,
network=network_spec
)
)
if experiment_num == 0 and self.history_data:
logging.info("Attaching history data to runner")
history_data = self.history_data
else:
history_data = None
if experiment_num == 0 and self.load_model_file:
logging.info("Loading model data from file: {}".format(self.load_model))