Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def evaluation(session, graph_ops, saver):
saver.restore(session, FLAGS.checkpoint_path)
print "Restored model weights from ", FLAGS.checkpoint_path
monitor_env = gym.make(FLAGS.game)
monitor_env.monitor.start(FLAGS.eval_dir + "/" + FLAGS.experiment + "/eval")
# Unpack graph ops
s = graph_ops["s"]
q_values = graph_ops["q_values"]
# Wrap env with AtariEnvironment helper class
env = AtariEnvironment(gym_env=monitor_env, resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height,
agent_history_length=FLAGS.agent_history_length)
for i_episode in xrange(FLAGS.num_eval_episodes):
s_t = env.get_initial_state()
ep_reward = 0
terminal = False
while not terminal:
monitor_env.render()
def train(session, graph_ops, saver):
# Set up game environments (one per thread)
envs = [gym.make(GAME) for i in range(NUM_CONCURRENT)]
summary_ops = setup_summaries()
summary_op = summary_ops[-1]
# Initialize variables
session.run(tf.global_variables_initializer())
writer = tf.summary.FileWriter(SUMMARY_SAVE_PATH, session.graph)
# Start NUM_CONCURRENT training threads
actor_learner_threads = [threading.Thread(target=actor_learner_thread, args=(thread_id, envs[thread_id], session, graph_ops, summary_ops, saver)) for thread_id in range(NUM_CONCURRENT)]
for t in actor_learner_threads:
t.start()
# Show the agents training and write summary statistics
last_summary_time = 0
while True:
if __name__ == "__main__":
adj_np = np.ones((5,3,4,4))
adj = tf.placeholder(shape=(5,3,4,4),dtype=tf.float32)
node_feature_np = np.ones((5,1,4,3))
node_feature = tf.placeholder(shape=(5,1,4,3),dtype=tf.float32)
ob_space = {}
atom_type = 5
ob_space['adj'] = gym.Space(shape=[3,5,5])
ob_space['node'] = gym.Space(shape=[1,5,atom_type])
ac_space = gym.spaces.MultiDiscrete([10, 10, 3])
policy = GCNPolicy(name='policy',ob_space=ob_space,ac_space=ac_space)
stochastic = True
env = gym.make('molecule-v0') # in gym format
env.init()
ob = env.reset()
# ob['adj'] = np.repeat(ob['adj'][None],2,axis=0)
# ob['node'] = np.repeat(ob['node'][None],2,axis=0)
print('adj',ob['adj'].shape)
print('node',ob['node'].shape)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(20):
ob = env.reset()
for j in range(0,20):
ac,vpred,debug = policy.act(stochastic,ob)
# if ac[0]==ac[1]:
# print('error')
# from_logits argument ensures transformation into normalized probabilities
weighted_sparse_ce = kls.SparseCategoricalCrossentropy(from_logits=True)
# policy loss is defined by policy gradients, weighted by advantages
# note: we only calculate the loss on the actions we've actually taken
actions = tf.cast(actions, tf.int32)
policy_loss = weighted_sparse_ce(actions, logits, sample_weight=advantages)
# entropy loss can be calculated via CE over itself
entropy_loss = kls.categorical_crossentropy(logits, logits, from_logits=True)
# here signs are flipped because optimizer minimizes
return policy_loss - self.params['entropy']*entropy_loss
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
env = gym.make('CartPole-v0')
model = Model(num_actions=env.action_space.n)
agent = A2CAgent(model)
rewards_history = agent.train(env)
print("Finished training.")
print("Total Episode Reward: %d out of 200" % agent.test(env, True))
plt.style.use('seaborn')
plt.plot(np.arange(0, len(rewards_history), 25), rewards_history[::25])
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.show()
def make_env(test):
env = gym.make(args.env)
# Use different random seeds for train and test envs
env_seed = 2 ** 32 - args.seed if test else args.seed
env.seed(env_seed)
# Cast observations to float32 because our model uses float32
env = chainerrl.wrappers.CastObservationToFloat32(env)
if args.monitor:
env = chainerrl.wrappers.Monitor(env, args.outdir)
if args.render:
env = chainerrl.wrappers.Render(env)
return env
def make_envs(env_names):
return [TfEnv(normalize(gym.make(env_name))) for env_name in env_names]
def new_env(self):
gym.logger.set_level(40) # to surpress warnings
return gym.make('MountainCarContinuous-v0').unwrapped
import gym
# from common.env_wrappers import DummyVecEnv
from common.utils import make_env
from algorithms.a3c.a3c import A3C
from common.value_networks import *
from common.policy_networks import *
''' load environment '''
env_id='BipedalWalker-v2'
env = gym.make(env_id).unwrapped
# env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized/wrapped environment to run
action_shape = env.action_space.shape
state_shape = env.observation_space.shape
# reproducible
seed = 2
np.random.seed(seed)
tf.random.set_seed(seed)
env.seed(seed)
''' build networks for the algorithm '''
num_hidden_layer = 4 #number of hidden layers for the networks
hidden_dim=64 # dimension of hidden layers for the networks
num_workers = 2
net_list2 = []
for i in range(num_workers+1):
with tf.name_scope('A3C'):
def run_async_vi_experiment(gym_name, problems, vi_maxiters=2500, iter_plan_interval=100,
first_plot_interval=1000, use_cache=False, biased=False):
start_time = time.time()
all_results = []
env = gym.make(gym_name)
test_env = gym.make(gym_name)
env._render = None
test_env._render = None
if isinstance(problems, int):
problems = list(range(problems))
num_problems = len(problems)
for j, problem_index in enumerate(problems):
print("\nRunning problem {}/{}".format(j, num_problems))
results_for_problem = []
all_results.append(results_for_problem)
env.fix_problem_index(problem_index)
env.reset()
runner = run_async_value_iteration(env, iter_plans=True, use_cache=use_cache,
iter_plan_interval=iter_plan_interval, epsilon=0., vi_maxiters=vi_maxiters,
biased=biased)
for i, plan in enumerate(runner):
test_env.fix_problem_index(problem_index)
# pick your rendering mode
sim_params.render = render
sim_params.restart_instance = False
create_env, env_name = make_create_env(params=flow_params, version=version)
register_env(env_name, create_env)
env_params = flow_params['env']
env_params.restart_instance = False
# create the agent that will be used to compute the actions
agent = agent_cls(env=env_name, config=config)
checkpoint = result_dir + '/checkpoint_{}'.format(checkpoint_num)
checkpoint = checkpoint + '/checkpoint-{}'.format(checkpoint_num)
agent.restore(checkpoint)
env = gym.make(env_name)
if sim_params.restart_instance:
env.restart_simulation(sim_params=sim_params, render=sim_params.render)
return env, env_params, agent