Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import tensorflow as tf
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.policy_value_model import PolicyValueModel
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(env.get_initial_state()[0])
model = PolicyValueModel(args, predictions=env.action_size)
inputs = observations_to_window([observation]).to_inputs()
# predict_next only returns a policy for the last observation
# in the sequence, and applies masking and softmax to the output
policy, value = model.predict_next(inputs)
# The policy is a 1D array of size (actions * num_nodes)
assert policy.shape.rank == 1
assert policy.shape == (env.action_size * len(observation.nodes),)
# There should be one floating point output Value
assert value.shape.rank == 0
assert isinstance(float(value.numpy()), float)
import tempfile
from typing import Tuple
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.embedding import MathyEmbedding
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
from thinc.api import TensorFlowWrapper, keras_subclass
from thinc.layers import Linear, ReLu, Softmax, chain, with_list
from thinc.model import Model
from thinc.shims.tensorflow import TensorFlowShim
from thinc.types import Array, Array1d, Array2d, ArrayNd
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
env.get_initial_state()[0], rnn_size=args.lstm_units
)
# output shape is: [num_observations, max_nodes_len, embedding_dimensions]
window = observations_to_window([observation, observation])
inputs = window.to_inputs()
input_shape = window.to_input_shapes()
@keras_subclass(
"MathyEmbedding",
X=window.to_inputs(),
Y=window.mask,
input_shape=input_shape,
args={"config": args},
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.policy_value_model import PolicyValueModel
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
args = BaseConfig(use_env_features=True)
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(env.get_initial_state()[0])
model = PolicyValueModel(args, predictions=env.action_size)
inputs = observations_to_window([observation]).to_inputs()
# predict_next only returns a policy for the last observation
# in the sequence, and applies masking and softmax to the output
policy, value = model.predict_next(inputs)
# The policy is a 1D array of size (actions * num_nodes)
assert policy.shape.rank == 1
assert policy.shape == (env.action_size * len(observation.nodes),)
# There should be one floating point output Value
assert value.shape.rank == 0
assert isinstance(float(value.numpy()), float)
import numpy as np
from mathy import envs
from mathy.agents.base_config import BaseConfig
from mathy.agents.embedding import MathyEmbedding
from mathy.env import MathyEnv
from mathy.state import MathyObservation, observations_to_window
args = BaseConfig()
env: MathyEnv = envs.PolySimplify()
observation: MathyObservation = env.state_to_observation(
env.get_initial_state()[0], rnn_size=args.lstm_units
)
model = MathyEmbedding(args)
inputs = observations_to_window([observation]).to_inputs()
# Expect that the RNN states are zero to begin
assert np.count_nonzero(model.state_h.numpy()) == 0
assert np.count_nonzero(model.state_c.numpy()) == 0
embeddings = model.call(inputs)
# Expect that the RNN states are non-zero
assert np.count_nonzero(model.state_h.numpy()) > 0
assert np.count_nonzero(model.state_c.numpy()) > 0
from mathy import (
MathyEnv,
MathyEnvState,
MathyObservation,
envs,
observations_to_window,
)
env: MathyEnv = envs.PolySimplify()
state: MathyEnvState = env.get_initial_state()[0]
observation: MathyObservation = env.state_to_observation(state)
# As many nodes as values
assert len(observation.nodes) == len(observation.values)
# Mask is number of nodes times number of actions
assert len(observation.mask) == len(observation.nodes) * env.action_size
return "PN"
def can_apply_to(self, node) -> bool:
is_sub = isinstance(node, SubtractExpression)
is_parent_add = isinstance(node.parent, AddExpression)
return is_sub and (node.parent is None or is_parent_add)
def apply_to(self, node):
change = super().apply_to(node)
change.save_parent() # connect result to node.parent
result = AddExpression(node.left, NegateExpression(node.right))
result.set_changed() # mark this node as changed for visualization
return change.done(result)
class CustomActionEnv(envs.PolySimplify):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.rules = MathyEnv.core_rules() + [PlusNegationRule()]
env = CustomActionEnv()
state = MathyEnvState(problem="4x - 2x")
expression = env.parser.parse(state.agent.problem)
action = env.random_action(expression, PlusNegationRule)
out_state, transition, _ = env.get_next_state(state, action)
assert out_state.agent.problem == "4x + -2x"
max_eps=3,
verbose=True,
topics=["poly"],
model_dir=model_folder,
update_gradients_every=4,
num_workers=1,
units=4,
embedding_units=4,
lstm_units=4,
print_training=True,
)
instance = A3CAgent(args)
instance.train()
# Load the model back in
model_two = get_or_create_policy_model(
args=args, predictions=PolySimplify().action_size, is_main=True
)
# Comment this out to keep your model
shutil.rmtree(model_folder)