Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Example of using doudizhu environment
import rlcard
from rlcard.agents.random_agent import RandomAgent
# make environment
env = rlcard.make('simpletexasholdem')
print('############## Environment of Simple Texas Holdem Initilized ################')
env.test()
# set agents
agent_0 = RandomAgent()
agent_1 = RandomAgent()
agent_2 = RandomAgent()
env.set_agents([agent_0, agent_1, agent_2])
# seed everything
env.set_seed(0)
agent_0.set_seed(0)
agent_1.set_seed(0)
agent_2.set_seed(0)
for _ in range(1):
# generate data from the environment
trajectories, player_wins = env.run()
print(trajectories)
print(player_wins)
# Example of using doudizhu environment
import rlcard
from rlcard.agents.random_agent import RandomAgent
# make environment
env = rlcard.make('simpletexasholdem')
print('############## Environment of Simple Texas Holdem Initilized ################')
env.test()
# set agents
agent_0 = RandomAgent()
agent_1 = RandomAgent()
agent_2 = RandomAgent()
env.set_agents([agent_0, agent_1, agent_2])
# seed everything
env.set_seed(0)
agent_0.set_seed(0)
agent_1.set_seed(0)
agent_2.set_seed(0)
for _ in range(1):
# generate data from the environment
trajectories, player_wins = env.run()
print(trajectories)
print(player_wins)
hidden_layers_sizes=[512,1024,2048,1024,512],
anticipatory_param=0.5,
batch_size=256,
rl_learning_rate=0.00005,
sl_learning_rate=0.00001,
min_buffer_size_to_learn=memory_init_size,
q_replay_memory_size=int(1e5),
q_replay_memory_init_size=memory_init_size,
q_norm_step=norm_step,
q_batch_size=256,
q_mlp_layers=[512,1024,2048,1024,512])
agents.append(agent)
sess.run(tf.global_variables_initializer())
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent, random_agent])
# Count the number of steps
step_counters = [0 for _ in range(env.player_num)]
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on UNO', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num):
# First sample a policy for the episode
for agent in agents:
agent.sample_episode_policy()
norm_step = 100
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
agent = DQNAgent(sess,
action_num=env.action_num,
replay_memory_size=20000,
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[6, 5, 15],
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents([agent, random_agent, random_agent])
eval_env.set_agents([agent, random_agent, random_agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='eposide', ylabel='reward', legend='DQN on Dou Dizhu', log_path='./experiments/doudizhu_dqn_result/log.txt', csv_path='./experiments/doudizhu_dqn_result/performance.csv')
for episode in range(episode_num):
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Feed transitions into agent memory, and train the agent
hidden_layers_sizes=[512,1024,2048,1024,512],
anticipatory_param=0.5,
batch_size=256,
rl_learning_rate=0.00005,
sl_learning_rate=0.00001,
min_buffer_size_to_learn=memory_init_size,
q_replay_memory_size=int(1e5),
q_replay_memory_init_size=memory_init_size,
q_norm_step=norm_step,
q_batch_size=256,
q_mlp_layers=[512,1024,2048,1024,512])
agents.append(agent)
sess.run(tf.global_variables_initializer())
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent, random_agent])
# Count the number of steps
step_counters = [0 for _ in range(env.player_num)]
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on Dou Dizhu', log_path='./experiments/doudizhu_nfsp_result/log.txt', csv_path='./experiments/doudizhu_nfsp_result/performance.csv')
for episode in range(episode_num):
# First sample a policy for the episode
for agent in agents:
agent.sample_episode_policy()
# Example of using doudizhu environment
import rlcard
from rlcard.agents.random_agent import RandomAgent
# make environment
env = rlcard.make('doudizhu')
print('############## Environment of Doudizhu Initilized ################')
# set agents
agent_0 = RandomAgent(309)
agent_1 = RandomAgent(309)
agent_2 = RandomAgent(309)
env.set_agents([agent_0, agent_1, agent_2])
# seed everything
for _ in range(1):
# TODO: add multi-process
# generate data from the environment
trajectories, player_wins = env.run(False)
print(trajectories)
print(player_wins)
# Set a global seed
set_global_seed(0)
with tf.Session() as sess:
# Set agents
global_step = tf.Variable(0, name='global_step', trainable=False)
agent = DQNAgent(sess,
scope='dqn',
action_num=env.action_num,
replay_memory_size=int(1e5),
replay_memory_init_size=memory_init_size,
norm_step=norm_step,
state_shape=[52],
mlp_layers=[512, 512])
random_agent = RandomAgent(action_num=eval_env.action_num)
sess.run(tf.global_variables_initializer())
env.set_agents([agent, random_agent])
eval_env.set_agents([agent, random_agent])
# Count the number of steps
step_counter = 0
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='DQN on No-Limit Texas Holdem', log_path='./experiments/nolimit_holdem_dqn_result/log.txt', csv_path='./experiments/nolimit_holdem_dqn_result/performance.csv')
for episode in range(episode_num):
# Generate data from the environment
trajectories, _ = env.run(is_training=True)
# Example of using doudizhu environment
import rlcard
from rlcard.agents.random_agent import RandomAgent
# make environment
env = rlcard.make('doudizhu')
print('############## Environment of Doudizhu Initilized ################')
# set agents
agent_0 = RandomAgent(309)
agent_1 = RandomAgent(309)
agent_2 = RandomAgent(309)
env.set_agents([agent_0, agent_1, agent_2])
# seed everything
for _ in range(1):
# TODO: add multi-process
# generate data from the environment
trajectories, player_wins = env.run(False)
print(trajectories)
print(player_wins)
for i in range(env.player_num):
agent = NFSPAgent(sess,
scope='nfsp' + str(i),
action_num=env.action_num,
state_shape=env.state_shape,
hidden_layers_sizes=[512,512],
anticipatory_param=0.1,
min_buffer_size_to_learn=memory_init_size,
q_replay_memory_init_size=memory_init_size,
q_norm_step=norm_step,
q_mlp_layers=[512,512])
agents.append(agent)
sess.run(tf.global_variables_initializer())
random_agent = RandomAgent(action_num=eval_env.action_num)
env.set_agents(agents)
eval_env.set_agents([agents[0], random_agent])
# Count the number of steps
step_counters = [0 for _ in range(env.player_num)]
# Init a Logger to plot the learning curve
logger = Logger(xlabel='timestep', ylabel='reward', legend='NFSP on Limit Texas Holdem', log_path=log_path, csv_path=csv_path)
for episode in range(episode_num):
# First sample a policy for the episode
for agent in agents:
agent.sample_episode_policy()
def normalize(e, agents, num):
''' Feed random data to normalizer
Args:
e (Env): AN Env class
agents (list): A list of Agent object
num (int): The number of steps to be normalized
'''
begin_step = e.timestep
e.set_agents([RandomAgent(e.action_num) for _ in range(e.player_num)])
while e.timestep - begin_step < num:
trajectories, _ = e.run(is_training=False)
for agent in agents:
for tra in trajectories:
for ts in tra:
agent.feed(ts)