Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
}
env_state, prob = mathy_env.get_initial_state(options)
# Configure MCTS options for train/eval
if eval_run:
num_rollouts = 500
num_exploration_moves = 0
epsilon = 0.0
else:
num_rollouts = 250
num_exploration_moves = int(mathy_env.max_moves * 0.8)
epsilon = 0.9
# Execute episode
model = mathy_eval if eval_run else mathy
mcts = MCTS(mathy_env, model, epsilon, num_rollouts)
actor = ActorMCTS(mcts, num_exploration_moves)
final_result = None
time_steps: List[deprecated_MathyEnvObservation] = []
episode_steps = 0
start = time.time()
while final_result is None:
episode_steps = episode_steps + 1
env_state, train_example, final_result = actor.step(
mathy_env, env_state, model, time_steps
)
elapsed = time.time() - start
episode_examples, episode_reward, is_win = final_result
lesson_experience_count += len(episode_examples)
lesson_problem_count += 1
if is_win:
from typing import Optional
import gym
import numpy as np
import plac
import tensorflow as tf
from gym.envs.registration import register
from mathy.agent.controller import MathModel
from mathy.agent.training.mcts import MCTS
from mathy.gym import MathyGymEnv
from mathy.a3c import A3CAgent, A3CArgs
__mcts: Optional[MCTS] = None
__model: Optional[MathModel] = None
__agent: Optional[A3CAgent] = None
def mathy_load_model(gym_env: MathyGymEnv):
global __model
if __model is None:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "5"
tf.compat.v1.logging.set_verbosity("CRITICAL")
__model = MathModel(gym_env.mathy.action_size, "agents/ablated")
__model.start()
def mathy_free_model():
This function executes one episode.
As the game is played, each turn is added as a training example to
trainExamples. The game continues until get_state_value returns a non-zero
value, then the outcome of the game is used to assign values to each example
in trainExamples.
"""
if game is None:
raise NotImplementedError("PracticeRunner.get_game returned None type")
if predictor is None:
raise NotImplementedError("PracticeRunner.get_predictor returned None type")
env_state, complexity = game.get_initial_state()
episode_history = []
move_count = 0
mcts = MCTS(game, predictor, self.config.cpuct, self.config.num_mcts_sims)
actor = ActorMCTS(mcts, self.config.num_exploration_moves)
while True:
move_count += 1
env_state, result = actor.step(game, env_state, predictor, episode_history)
if result is not None:
return result + (complexity,)
def mcts_start_problem(gym_env: MathyGymEnv):
global __mcts, __model
num_rollouts = 500
epsilon = 0.0
mathy_load_model(gym_env)
assert __model is not None
__mcts = MCTS(
env=gym_env.mathy,
model=__model,
cpuct=0.0,
num_mcts_sims=num_rollouts,
epsilon=epsilon,
)
This function executes one episode.
As the game is played, each turn is added as a training example to
trainExamples. The game continues until get_state_value returns a non-zero
value, then the outcome of the game is used to assign values to each example
in trainExamples.
"""
if game is None:
raise NotImplementedError("PracticeRunner.get_game returned None type")
if predictor is None:
raise NotImplementedError("PracticeRunner.get_predictor returned None type")
env_state, complexity = game.get_initial_state()
episode_history = []
move_count = 0
mcts = MCTS(game, predictor, self.config.cpuct, self.config.num_mcts_sims)
actor = ActorMCTS(mcts, self.config.num_exploration_moves)
while True:
move_count += 1
env_state, result = actor.step(game, env_state, predictor, episode_history)
if result is not None:
return result + (complexity,)
print("\n[exam] {} - {}...".format(plan.name.lower(), lesson.name.lower()))
# Fill up a certain amount of experience per problem type
lesson_experience_count = 0
if lesson.num_observations is not None:
iter_experience = lesson.num_observations
else:
iter_experience = short_term_size
while lesson_experience_count < iter_experience:
env_state, complexity = controller.get_initial_state(print_problem=False)
complexity_value = complexity * 3
controller.max_moves = (
lesson.max_turns if lesson.max_turns is not None else complexity_value
)
# generate a new problem now that we've set the max_turns
env_state, complexity = controller.get_initial_state()
mcts = MCTS(controller, model, epsilon, mcts_sims)
actor = ActorMCTS(mcts, num_exploration_moves)
final_result = None
time_steps = []
episode_steps = 0
start = time.time()
while final_result is None:
episode_steps = episode_steps + 1
env_state, train_example, final_result = actor.step(
controller, env_state, model, time_steps
)
elapsed = time.time() - start
episode_examples, episode_reward, is_win = final_result
lesson_experience_count += len(episode_examples)
if is_win:
num_solved = num_solved + 1