From ae076a54256c969cc27a8461197f38fdfdd202a3 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Fri, 22 Sep 2017 14:01:57 -0700 Subject: [PATCH 01/11] made BrainParameters a class to set default values Modified the error message if the state is discrete --- .../Assets/ML-Agents/Scripts/Brain.cs | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/unity-environment/Assets/ML-Agents/Scripts/Brain.cs b/unity-environment/Assets/ML-Agents/Scripts/Brain.cs index c367472cfd..e67dcc0a87 100755 --- a/unity-environment/Assets/ML-Agents/Scripts/Brain.cs +++ b/unity-environment/Assets/ML-Agents/Scripts/Brain.cs @@ -55,27 +55,26 @@ public struct resolution * Defines brain-specific parameters */ [System.Serializable] -public struct BrainParameters +public class BrainParameters { - public int stateSize; + public int stateSize = 1; /**< \brief If continuous : The length of the float vector that represents * the state *
If discrete : The number of possible values the state can take*/ - public int actionSize; + public int actionSize = 1; /**< \brief If continuous : The length of the float vector that represents the action *
If discrete : The number of possible values the action can take*/ - public int memorySize; + public int memorySize = 0; /**< \brief The length of the float vector that holds the memory for the agent */ public resolution[] cameraResolutions; /**<\brief The list of observation resolutions for the brain */ public string[] actionDescriptions; /**< \brief The list of strings describing what the actions correpond to */ - public StateType actionSpaceType; + public StateType actionSpaceType = StateType.discrete; /**< \brief Defines if the action is discrete or continuous */ - public StateType stateSpaceType; + public StateType stateSpaceType = StateType.continuous; /**< \brief Defines if the state is discrete or continuous */ - } /** @@ -87,7 +86,7 @@ public struct BrainParameters */ public class Brain : MonoBehaviour { - public BrainParameters brainParameters; + public BrainParameters brainParameters = new BrainParameters(); /**< \brief Defines brain specific parameters such as the state size*/ public BrainType brainType; /**< \brief Defines what is the type of the brain : @@ -178,10 +177,15 @@ public Dictionary> CollectStates() foreach (KeyValuePair idAgent in agents) { List states = idAgent.Value.CollectState(); - if (states.Count != brainParameters.stateSize) + if ((states.Count != brainParameters.stateSize) && (brainParameters.stateSpaceType == StateType.continuous )) + { + throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}: + Was expecting {1} continuous states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count)); + } + if ((states.Count != 1) && (brainParameters.stateSpaceType == StateType.discrete )) { throw new UnityAgentsException(string.Format(@"The number of states does not match for agent {0}: - Was expecting {1} states but received {2}.", idAgent.Value.gameObject.name, brainParameters.stateSize, states.Count)); + Was expecting 1 discrete states but received {1}.", idAgent.Value.gameObject.name, states.Count)); } result.Add(idAgent.Key, states); } From 0018191dc632eb2538c4090b814c3ff13f59e6a7 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Sat, 23 Sep 2017 12:26:41 -0700 Subject: [PATCH 02/11] Add discrete state support to PPO and provide discrete state example environment --- python/ppo/models.py | 128 ++-- .../Assets/ML-Agents/Examples/Basic.meta | 9 + .../ML-Agents/Examples/Basic/Materials.meta | 9 + .../Examples/Basic/Materials/agent.mat | 76 ++ .../Examples/Basic/Materials/agent.mat.meta | 9 + .../Examples/Basic/Materials/goal.mat | 76 ++ .../Examples/Basic/Materials/goal.mat.meta | 9 + .../ML-Agents/Examples/Basic/Scene.unity | 702 ++++++++++++++++++ .../ML-Agents/Examples/Basic/Scene.unity.meta | 8 + .../ML-Agents/Examples/Basic/Scripts.meta | 9 + .../Examples/Basic/Scripts/BasicAcademy.cs | 17 + .../Basic/Scripts/BasicAcademy.cs.meta | 12 + .../Examples/Basic/Scripts/BasicAgent.cs | 63 ++ .../Examples/Basic/Scripts/BasicAgent.cs.meta | 12 + .../Examples/Basic/Scripts/BasicDecision.cs | 18 + .../Basic/Scripts/BasicDecision.cs.meta | 12 + 16 files changed, 1106 insertions(+), 63 deletions(-) create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic.meta create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta create mode 100755 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta create mode 100755 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta create mode 100755 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs create mode 100644 unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta diff --git a/python/ppo/models.py b/python/ppo/models.py index 6c035c6d36..64a467c5f3 100755 --- a/python/ppo/models.py +++ b/python/ppo/models.py @@ -7,7 +7,7 @@ def create_agent_model(env, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_step=5e6): """ - Takes a Unity environment and model-specific hyperparameters and returns the + Takes a Unity environment and model-specific hyper-parameters and returns the appropriate PPO agent model for the environment. :param env: a Unity environment. :param lr: Learning rate. @@ -15,26 +15,18 @@ def create_agent_model(env, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_ste :param epsilon: Value for policy-divergence threshold. :param beta: Strength of entropy regularization. :return: a sub-class of PPOAgent tailored to the environment. + :param max_step: Total number of training steps. """ brain_name = env.brain_names[0] - if env.brains[brain_name].action_space_type == "continuous": - if env.brains[brain_name].number_observations == 0: - return ContinuousControlModel(lr, env.brains[brain_name].state_space_size, - env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step) + brain = env.brains[brain_name] + if brain.action_space_type == "continuous": + if brain.number_observations == 0: + return ContinuousControlModel(lr, brain, h_size, epsilon, max_step) else: raise UnityEnvironmentException("There is currently no PPO model which supports both a continuous " "action space and camera observations.") - if env.brains[brain_name].action_space_type == "discrete": - if env.brains[brain_name].number_observations == 0: - return DiscreteControlModel(lr, env.brains[brain_name].state_space_size, - env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step) - else: - brain = env.brains[brain_name] - if env.brains[brain_name].state_space_size > 0: - print("This brain contains agents with both observations and states. There is currently no PPO model" - "which supports this. Defaulting to Vision-based PPO model.") - h, w = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] - return VisualDiscreteControlModel(lr, h, w, env.brains[brain_name].action_space_size, h_size, epsilon, beta, max_step) + if brain.action_space_type == "discrete": + return DiscreteControlModel(lr, brain, h_size, epsilon, beta, max_step) def save_model(sess, saver, model_path="./", steps=0): @@ -69,7 +61,30 @@ def export_graph(model_path, env_name="env", target_nodes="action"): class PPOModel(object): - def __init__(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step): + def create_visual_encoder(self, o_size_h, o_size_w, h_size): + self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, + name='observation_0') + self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2], + use_bias=False, activation=tf.nn.elu) + self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2], + use_bias=False, activation=tf.nn.elu) + hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu) + return hidden + + def create_continuous_state_encoder(self, s_size, h_size): + self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') + hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.elu) + hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=tf.nn.elu) + return hidden_2 + + def create_discrete_state_encoder(self, s_size, h_size): + self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state') + state_in = tf.reshape(self.state_in, [-1]) + state_onehot = c_layers.one_hot_encoding(state_in, s_size) + hidden = tf.layers.dense(state_onehot, h_size, activation=tf.nn.elu) + return hidden + + def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step): """ Creates training-specific Tensorflow ops for PPO models. :param probs: Current policy probabilities @@ -79,6 +94,7 @@ def __init__(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step :param entropy: Current policy entropy :param epsilon: Value for policy-divergence threshold :param lr: Learning rate + :param max_step: Total number of training steps. """ self.returns_holder = tf.placeholder(shape=[None], dtype=tf.float32, name='discounted_rewards') self.advantage = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='advantages') @@ -104,13 +120,16 @@ def __init__(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step class ContinuousControlModel(PPOModel): - def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step): + def __init__(self, lr, brain, h_size, epsilon, max_step): """ Creates Continuous Control Actor-Critic model. :param s_size: State-space size :param a_size: Action-space size :param h_size: Hidden layer size """ + s_size = brain.state_space_size + a_size = brain.action_space_size + self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') hidden_policy = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh) @@ -138,57 +157,40 @@ def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step): self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') - PPOModel.__init__(self, self.probs, self.old_probs, self.value, self.entropy, 0.0, epsilon, lr, max_step) + self.create_ppo_optimizer(self.probs, self.old_probs, self.value, self.entropy, 0.0, epsilon, lr, max_step) class DiscreteControlModel(PPOModel): - def __init__(self, lr, s_size, a_size, h_size, epsilon, beta, max_step): + def __init__(self, lr, brain, h_size, epsilon, beta, max_step): """ Creates Discrete Control Actor-Critic model. - :param s_size: State-space size - :param a_size: Action-space size + :param brain: State-space size :param h_size: Hidden layer size """ - self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') - self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') - hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.elu) - hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=tf.nn.elu) - self.policy = tf.layers.dense(hidden_2, a_size, activation=None, use_bias=False, - kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1)) - self.probs = tf.nn.softmax(self.policy) - self.action = tf.multinomial(self.policy, 1) - self.output = tf.identity(self.action, name='action') - self.value = tf.layers.dense(hidden_2, 1, activation=None, use_bias=False) - - self.entropy = -tf.reduce_sum(self.probs * tf.log(self.probs + 1e-10), axis=1) - - self.action_holder = tf.placeholder(shape=[None], dtype=tf.int32) - self.selected_actions = c_layers.one_hot_encoding(self.action_holder, a_size) - self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') - self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) - self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) - - PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs, - self.value, self.entropy, beta, epsilon, lr, max_step) + hidden_state, hidden_visual, hidden = None, None, None + if brain.number_observations > 0: + h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] + hidden_visual = self.create_visual_encoder(h_size, w_size, h_size) + if brain.state_space_size > 0: + s_size = brain.state_space_size + if brain.state_space_type == "continuous": + hidden_state = self.create_continuous_state_encoder(s_size, h_size) + else: + hidden_state = self.create_discrete_state_encoder(s_size, h_size) + + if hidden_visual is None and hidden_state is None: + raise Exception("No valid network configuration possible. " + "There are no states or observations in this brain") + elif hidden_visual is not None and hidden_state is None: + hidden = hidden_visual + elif hidden_visual is None and hidden_state is not None: + hidden = hidden_state + elif hidden_visual is not None and hidden_state is not None: + hidden = tf.concat([hidden_visual, hidden_state], axis=1) + + a_size = brain.action_space_size - -class VisualDiscreteControlModel(PPOModel): - def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta, max_step): - """ - Creates Discrete Control Actor-Critic model for use with visual observations (images). - :param o_size_h: Observation height. - :param o_size_w: Observation width. - :param a_size: Action-space size. - :param h_size: Hidden layer size. - """ - self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, - name='observation_0') - self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2], - use_bias=False, activation=tf.nn.elu) - self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2], - use_bias=False, activation=tf.nn.elu) - self.batch_size = tf.placeholder(shape=None, dtype=tf.int32) - hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu) + self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') self.policy = tf.layers.dense(hidden, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1)) self.probs = tf.nn.softmax(self.policy) @@ -204,5 +206,5 @@ def __init__(self, lr, o_size_h, o_size_w, a_size, h_size, epsilon, beta, max_st self.responsible_probs = tf.reduce_sum(self.probs * self.selected_actions, axis=1) self.old_responsible_probs = tf.reduce_sum(self.old_probs * self.selected_actions, axis=1) - PPOModel.__init__(self, self.responsible_probs, self.old_responsible_probs, - self.value, self.entropy, beta, epsilon, lr, max_step) + self.create_ppo_optimizer(self.responsible_probs, self.old_responsible_probs, + self.value, self.entropy, beta, epsilon, lr, max_step) \ No newline at end of file diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic.meta b/unity-environment/Assets/ML-Agents/Examples/Basic.meta new file mode 100644 index 0000000000..3eace2f071 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 230c334ab2f144bcda6eea42d18ebdc8 +folderAsset: yes +timeCreated: 1506189168 +licenseType: Pro +DefaultImporter: + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta new file mode 100644 index 0000000000..bf308f3ba4 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 0f9b2a7b3f61045b8a791eeae8175dc5 +folderAsset: yes +timeCreated: 1506189694 +licenseType: Pro +DefaultImporter: + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat new file mode 100644 index 0000000000..205af00a64 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat @@ -0,0 +1,76 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!21 &2100000 +Material: + serializedVersion: 6 + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_Name: agent + m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0} + m_ShaderKeywords: + m_LightmapFlags: 4 + m_EnableInstancingVariants: 0 + m_DoubleSidedGI: 0 + m_CustomRenderQueue: -1 + stringTagMap: {} + disabledShaderPasses: [] + m_SavedProperties: + serializedVersion: 3 + m_TexEnvs: + - _BumpMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _DetailAlbedoMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _DetailMask: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _DetailNormalMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _EmissionMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _MainTex: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _MetallicGlossMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _OcclusionMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _ParallaxMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + m_Floats: + - _BumpScale: 1 + - _Cutoff: 0.5 + - _DetailNormalMapScale: 1 + - _DstBlend: 0 + - _GlossMapScale: 1 + - _Glossiness: 0.5 + - _GlossyReflections: 1 + - _Metallic: 0 + - _Mode: 0 + - _OcclusionStrength: 1 + - _Parallax: 0.02 + - _SmoothnessTextureChannel: 0 + - _SpecularHighlights: 1 + - _SrcBlend: 1 + - _UVSec: 0 + - _ZWrite: 1 + m_Colors: + - _Color: {r: 0.10980392, g: 0.6039216, b: 1, a: 0.8392157} + - _EmissionColor: {r: 0, g: 0, b: 0, a: 1} diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta new file mode 100644 index 0000000000..81241b699f --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/agent.mat.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 260483cdfc6b14e26823a02f23bd8baa +timeCreated: 1506189720 +licenseType: Pro +NativeFormatImporter: + mainObjectFileID: 2100000 + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat new file mode 100644 index 0000000000..c90895b66c --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat @@ -0,0 +1,76 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!21 &2100000 +Material: + serializedVersion: 6 + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_Name: goal + m_Shader: {fileID: 46, guid: 0000000000000000f000000000000000, type: 0} + m_ShaderKeywords: + m_LightmapFlags: 4 + m_EnableInstancingVariants: 0 + m_DoubleSidedGI: 0 + m_CustomRenderQueue: -1 + stringTagMap: {} + disabledShaderPasses: [] + m_SavedProperties: + serializedVersion: 3 + m_TexEnvs: + - _BumpMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _DetailAlbedoMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _DetailMask: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _DetailNormalMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _EmissionMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _MainTex: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _MetallicGlossMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _OcclusionMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + - _ParallaxMap: + m_Texture: {fileID: 0} + m_Scale: {x: 1, y: 1} + m_Offset: {x: 0, y: 0} + m_Floats: + - _BumpScale: 1 + - _Cutoff: 0.5 + - _DetailNormalMapScale: 1 + - _DstBlend: 0 + - _GlossMapScale: 1 + - _Glossiness: 0.5 + - _GlossyReflections: 1 + - _Metallic: 0 + - _Mode: 0 + - _OcclusionStrength: 1 + - _Parallax: 0.02 + - _SmoothnessTextureChannel: 0 + - _SpecularHighlights: 1 + - _SrcBlend: 1 + - _UVSec: 0 + - _ZWrite: 1 + m_Colors: + - _Color: {r: 0.5058824, g: 0.74509805, b: 0.25490198, a: 1} + - _EmissionColor: {r: 0, g: 0, b: 0, a: 1} diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta new file mode 100644 index 0000000000..0027190c1e --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Materials/goal.mat.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: 624b24bbec31f44babfb57ef2dfbc537 +timeCreated: 1506189863 +licenseType: Pro +NativeFormatImporter: + mainObjectFileID: 2100000 + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity b/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity new file mode 100644 index 0000000000..5222d9cae8 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity @@ -0,0 +1,702 @@ +%YAML 1.1 +%TAG !u! tag:unity3d.com,2011: +--- !u!29 &1 +OcclusionCullingSettings: + m_ObjectHideFlags: 0 + serializedVersion: 2 + m_OcclusionBakeSettings: + smallestOccluder: 5 + smallestHole: 0.25 + backfaceThreshold: 100 + m_SceneGUID: 00000000000000000000000000000000 + m_OcclusionCullingData: {fileID: 0} +--- !u!104 &2 +RenderSettings: + m_ObjectHideFlags: 0 + serializedVersion: 8 + m_Fog: 0 + m_FogColor: {r: 0.5, g: 0.5, b: 0.5, a: 1} + m_FogMode: 3 + m_FogDensity: 0.01 + m_LinearFogStart: 0 + m_LinearFogEnd: 300 + m_AmbientSkyColor: {r: 0.212, g: 0.227, b: 0.259, a: 1} + m_AmbientEquatorColor: {r: 0.114, g: 0.125, b: 0.133, a: 1} + m_AmbientGroundColor: {r: 0.047, g: 0.043, b: 0.035, a: 1} + m_AmbientIntensity: 1 + m_AmbientMode: 0 + m_SubtractiveShadowColor: {r: 0.42, g: 0.478, b: 0.627, a: 1} + m_SkyboxMaterial: {fileID: 10304, guid: 0000000000000000f000000000000000, type: 0} + m_HaloStrength: 0.5 + m_FlareStrength: 1 + m_FlareFadeSpeed: 3 + m_HaloTexture: {fileID: 0} + m_SpotCookie: {fileID: 10001, guid: 0000000000000000e000000000000000, type: 0} + m_DefaultReflectionMode: 0 + m_DefaultReflectionResolution: 128 + m_ReflectionBounces: 1 + m_ReflectionIntensity: 1 + m_CustomReflection: {fileID: 0} + m_Sun: {fileID: 0} + m_IndirectSpecularColor: {r: 0, g: 0, b: 0, a: 1} +--- !u!157 &3 +LightmapSettings: + m_ObjectHideFlags: 0 + serializedVersion: 11 + m_GIWorkflowMode: 1 + m_GISettings: + serializedVersion: 2 + m_BounceScale: 1 + m_IndirectOutputScale: 1 + m_AlbedoBoost: 1 + m_TemporalCoherenceThreshold: 1 + m_EnvironmentLightingMode: 0 + m_EnableBakedLightmaps: 1 + m_EnableRealtimeLightmaps: 1 + m_LightmapEditorSettings: + serializedVersion: 9 + m_Resolution: 2 + m_BakeResolution: 40 + m_TextureWidth: 1024 + m_TextureHeight: 1024 + m_AO: 0 + m_AOMaxDistance: 1 + m_CompAOExponent: 1 + m_CompAOExponentDirect: 0 + m_Padding: 2 + m_LightmapParameters: {fileID: 0} + m_LightmapsBakeMode: 1 + m_TextureCompression: 1 + m_FinalGather: 0 + m_FinalGatherFiltering: 1 + m_FinalGatherRayCount: 256 + m_ReflectionCompression: 2 + m_MixedBakeMode: 2 + m_BakeBackend: 0 + m_PVRSampling: 1 + m_PVRDirectSampleCount: 32 + m_PVRSampleCount: 500 + m_PVRBounces: 2 + m_PVRFiltering: 0 + m_PVRFilteringMode: 1 + m_PVRCulling: 1 + m_PVRFilteringGaussRadiusDirect: 1 + m_PVRFilteringGaussRadiusIndirect: 5 + m_PVRFilteringGaussRadiusAO: 2 + m_PVRFilteringAtrousColorSigma: 1 + m_PVRFilteringAtrousNormalSigma: 1 + m_PVRFilteringAtrousPositionSigma: 1 + m_LightingDataAsset: {fileID: 0} + m_UseShadowmask: 1 +--- !u!196 &4 +NavMeshSettings: + serializedVersion: 2 + m_ObjectHideFlags: 0 + m_BuildSettings: + serializedVersion: 2 + agentTypeID: 0 + agentRadius: 0.5 + agentHeight: 2 + agentSlope: 45 + agentClimb: 0.4 + ledgeDropHeight: 0 + maxJumpAcrossDistance: 0 + minRegionArea: 2 + manualCellSize: 0 + cellSize: 0.16666667 + manualTileSize: 0 + tileSize: 256 + accuratePlacement: 0 + m_NavMeshData: {fileID: 0} +--- !u!1 &282272644 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 282272648} + - component: {fileID: 282272647} + - component: {fileID: 282272646} + - component: {fileID: 282272645} + - component: {fileID: 282272649} + m_Layer: 0 + m_Name: Agent + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!23 &282272645 +MeshRenderer: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 282272644} + m_Enabled: 1 + m_CastShadows: 1 + m_ReceiveShadows: 1 + m_MotionVectors: 1 + m_LightProbeUsage: 1 + m_ReflectionProbeUsage: 1 + m_Materials: + - {fileID: 2100000, guid: 260483cdfc6b14e26823a02f23bd8baa, type: 2} + m_StaticBatchInfo: + firstSubMesh: 0 + subMeshCount: 0 + m_StaticBatchRoot: {fileID: 0} + m_ProbeAnchor: {fileID: 0} + m_LightProbeVolumeOverride: {fileID: 0} + m_ScaleInLightmap: 1 + m_PreserveUVs: 1 + m_IgnoreNormalsForChartDetection: 0 + m_ImportantGI: 0 + m_SelectedEditorRenderState: 3 + m_MinimumChartSize: 4 + m_AutoUVMaxDistance: 0.5 + m_AutoUVMaxAngle: 89 + m_LightmapParameters: {fileID: 0} + m_SortingLayerID: 0 + m_SortingLayer: 0 + m_SortingOrder: 0 +--- !u!65 &282272646 +BoxCollider: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 282272644} + m_Material: {fileID: 0} + m_IsTrigger: 0 + m_Enabled: 1 + serializedVersion: 2 + m_Size: {x: 1, y: 1, z: 1} + m_Center: {x: 0, y: 0, z: 0} +--- !u!33 &282272647 +MeshFilter: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 282272644} + m_Mesh: {fileID: 10202, guid: 0000000000000000e000000000000000, type: 0} +--- !u!4 &282272648 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 282272644} + m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} + m_LocalPosition: {x: 0, y: 0, z: 0} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 3 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} +--- !u!114 &282272649 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 282272644} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 624480a72e46148118ab2e2d89b537de, type: 3} + m_Name: + m_EditorClassIdentifier: + brain: {fileID: 846768605} + observations: [] + maxStep: 0 + resetOnDone: 1 + reward: 0 + done: 0 + value: 0 + CummulativeReward: 0 + stepCounter: 0 + agentStoredAction: [] + memory: [] + id: 0 + position: 0 + smallGoalPosition: -3 + largeGoalPosition: 7 + largeGoal: {fileID: 984725368} + smallGoal: {fileID: 1178588871} + minPosition: -10 + maxPosition: 10 +--- !u!114 &395380616 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 943466ab374444748a364f9d6c3e2fe2, type: 3} + m_Name: (Clone) + m_EditorClassIdentifier: + brain: {fileID: 0} +--- !u!114 &577874698 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 41e9bda8f3cf1492fa74926a530f6f70, type: 3} + m_Name: (Clone) + m_EditorClassIdentifier: + continuousPlayerActions: [] + discretePlayerActions: + - key: 97 + value: 0 + - key: 100 + value: 1 + defaultAction: -1 + brain: {fileID: 846768605} +--- !u!1 &762086410 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 762086412} + - component: {fileID: 762086411} + m_Layer: 0 + m_Name: Directional Light + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!108 &762086411 +Light: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 762086410} + m_Enabled: 1 + serializedVersion: 8 + m_Type: 1 + m_Color: {r: 1, g: 0.95686275, b: 0.8392157, a: 1} + m_Intensity: 1 + m_Range: 10 + m_SpotAngle: 30 + m_CookieSize: 10 + m_Shadows: + m_Type: 2 + m_Resolution: -1 + m_CustomResolution: -1 + m_Strength: 1 + m_Bias: 0.05 + m_NormalBias: 0.4 + m_NearPlane: 0.2 + m_Cookie: {fileID: 0} + m_DrawHalo: 0 + m_Flare: {fileID: 0} + m_RenderMode: 0 + m_CullingMask: + serializedVersion: 2 + m_Bits: 4294967295 + m_Lightmapping: 4 + m_AreaSize: {x: 1, y: 1} + m_BounceIntensity: 1 + m_ColorTemperature: 6570 + m_UseColorTemperature: 0 + m_ShadowRadius: 0 + m_ShadowAngle: 0 +--- !u!4 &762086412 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 762086410} + m_LocalRotation: {x: 0.40821788, y: -0.23456968, z: 0.10938163, w: 0.8754261} + m_LocalPosition: {x: 0, y: 3, z: 0} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 1 + m_LocalEulerAnglesHint: {x: 50, y: -30, z: 0} +--- !u!1 &846768603 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 846768604} + - component: {fileID: 846768605} + m_Layer: 0 + m_Name: Brain + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!4 &846768604 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 846768603} + m_LocalRotation: {x: -0, y: -0, z: -0, w: 1} + m_LocalPosition: {x: 0, y: 0, z: 0} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 1574236049} + m_RootOrder: 0 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} +--- !u!114 &846768605 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 846768603} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: c676a8ddf5a5f4f64b35e9ed5028679d, type: 3} + m_Name: + m_EditorClassIdentifier: + brainParameters: + stateSize: 1 + actionSize: 2 + memorySize: 0 + cameraResolutions: [] + actionDescriptions: + - Left + - Right + actionSpaceType: 0 + stateSpaceType: 0 + brainType: 0 + CoreBrains: + - {fileID: 577874698} + - {fileID: 395380616} + - {fileID: 1503497339} + instanceID: 10208 +--- !u!1 &984725368 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 984725372} + - component: {fileID: 984725371} + - component: {fileID: 984725370} + - component: {fileID: 984725369} + m_Layer: 0 + m_Name: largeGoal + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!23 &984725369 +MeshRenderer: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 984725368} + m_Enabled: 1 + m_CastShadows: 1 + m_ReceiveShadows: 1 + m_MotionVectors: 1 + m_LightProbeUsage: 1 + m_ReflectionProbeUsage: 1 + m_Materials: + - {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2} + m_StaticBatchInfo: + firstSubMesh: 0 + subMeshCount: 0 + m_StaticBatchRoot: {fileID: 0} + m_ProbeAnchor: {fileID: 0} + m_LightProbeVolumeOverride: {fileID: 0} + m_ScaleInLightmap: 1 + m_PreserveUVs: 1 + m_IgnoreNormalsForChartDetection: 0 + m_ImportantGI: 0 + m_SelectedEditorRenderState: 3 + m_MinimumChartSize: 4 + m_AutoUVMaxDistance: 0.5 + m_AutoUVMaxAngle: 89 + m_LightmapParameters: {fileID: 0} + m_SortingLayerID: 0 + m_SortingLayer: 0 + m_SortingOrder: 0 +--- !u!135 &984725370 +SphereCollider: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 984725368} + m_Material: {fileID: 0} + m_IsTrigger: 0 + m_Enabled: 1 + serializedVersion: 2 + m_Radius: 0.5 + m_Center: {x: 0, y: 0, z: 0} +--- !u!33 &984725371 +MeshFilter: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 984725368} + m_Mesh: {fileID: 10207, guid: 0000000000000000e000000000000000, type: 0} +--- !u!4 &984725372 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 984725368} + m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} + m_LocalPosition: {x: 0, y: 0, z: 0} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 4 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} +--- !u!1 &1178588871 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 1178588875} + - component: {fileID: 1178588874} + - component: {fileID: 1178588873} + - component: {fileID: 1178588872} + m_Layer: 0 + m_Name: smallGoal + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!23 &1178588872 +MeshRenderer: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1178588871} + m_Enabled: 1 + m_CastShadows: 1 + m_ReceiveShadows: 1 + m_MotionVectors: 1 + m_LightProbeUsage: 1 + m_ReflectionProbeUsage: 1 + m_Materials: + - {fileID: 2100000, guid: 624b24bbec31f44babfb57ef2dfbc537, type: 2} + m_StaticBatchInfo: + firstSubMesh: 0 + subMeshCount: 0 + m_StaticBatchRoot: {fileID: 0} + m_ProbeAnchor: {fileID: 0} + m_LightProbeVolumeOverride: {fileID: 0} + m_ScaleInLightmap: 1 + m_PreserveUVs: 1 + m_IgnoreNormalsForChartDetection: 0 + m_ImportantGI: 0 + m_SelectedEditorRenderState: 3 + m_MinimumChartSize: 4 + m_AutoUVMaxDistance: 0.5 + m_AutoUVMaxAngle: 89 + m_LightmapParameters: {fileID: 0} + m_SortingLayerID: 0 + m_SortingLayer: 0 + m_SortingOrder: 0 +--- !u!135 &1178588873 +SphereCollider: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1178588871} + m_Material: {fileID: 0} + m_IsTrigger: 0 + m_Enabled: 1 + serializedVersion: 2 + m_Radius: 0.5 + m_Center: {x: 0, y: 0, z: 0} +--- !u!33 &1178588874 +MeshFilter: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1178588871} + m_Mesh: {fileID: 10207, guid: 0000000000000000e000000000000000, type: 0} +--- !u!4 &1178588875 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1178588871} + m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} + m_LocalPosition: {x: 0, y: 0, z: 0} + m_LocalScale: {x: 0.5, y: 0.5, z: 0.5} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 5 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} +--- !u!114 &1503497339 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 0} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 35813a1be64e144f887d7d5f15b963fa, type: 3} + m_Name: (Clone) + m_EditorClassIdentifier: + brain: {fileID: 846768605} +--- !u!1 &1574236047 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 1574236049} + - component: {fileID: 1574236048} + m_Layer: 0 + m_Name: Academy + m_TagString: Untagged + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!114 &1574236048 +MonoBehaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1574236047} + m_Enabled: 1 + m_EditorHideFlags: 0 + m_Script: {fileID: 11500000, guid: 19276d4dc78ee49f1ba258293f17636c, type: 3} + m_Name: + m_EditorClassIdentifier: + maxSteps: 0 + frameToSkip: 0 + waitTime: 0.5 + trainingConfiguration: + width: 80 + height: 80 + qualityLevel: 1 + timeScale: 100 + targetFrameRate: 60 + inferenceConfiguration: + width: 1280 + height: 720 + qualityLevel: 5 + timeScale: 1 + targetFrameRate: 60 + defaultResetParameters: [] + done: 0 + episodeCount: 1 + currentStep: 0 + isInference: 0 + windowResize: 0 +--- !u!4 &1574236049 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1574236047} + m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} + m_LocalPosition: {x: 0.71938086, y: 0.27357092, z: 4.1970553} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: + - {fileID: 846768604} + m_Father: {fileID: 0} + m_RootOrder: 2 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} +--- !u!1 &1715640920 +GameObject: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + serializedVersion: 5 + m_Component: + - component: {fileID: 1715640925} + - component: {fileID: 1715640924} + - component: {fileID: 1715640923} + - component: {fileID: 1715640922} + - component: {fileID: 1715640921} + m_Layer: 0 + m_Name: Main Camera + m_TagString: MainCamera + m_Icon: {fileID: 0} + m_NavMeshLayer: 0 + m_StaticEditorFlags: 0 + m_IsActive: 1 +--- !u!81 &1715640921 +AudioListener: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1715640920} + m_Enabled: 1 +--- !u!124 &1715640922 +Behaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1715640920} + m_Enabled: 1 +--- !u!92 &1715640923 +Behaviour: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1715640920} + m_Enabled: 1 +--- !u!20 &1715640924 +Camera: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1715640920} + m_Enabled: 1 + serializedVersion: 2 + m_ClearFlags: 2 + m_BackGroundColor: {r: 0.7411765, g: 0.7411765, b: 0.7529412, a: 0} + m_NormalizedViewPortRect: + serializedVersion: 2 + x: 0 + y: 0 + width: 1 + height: 1 + near clip plane: 0.3 + far clip plane: 1000 + field of view: 60 + orthographic: 0 + orthographic size: 5 + m_Depth: -1 + m_CullingMask: + serializedVersion: 2 + m_Bits: 4294967295 + m_RenderingPath: -1 + m_TargetTexture: {fileID: 0} + m_TargetDisplay: 0 + m_TargetEye: 3 + m_HDR: 1 + m_AllowMSAA: 1 + m_ForceIntoRT: 0 + m_OcclusionCulling: 1 + m_StereoConvergence: 10 + m_StereoSeparation: 0.022 + m_StereoMirrorMode: 0 +--- !u!4 &1715640925 +Transform: + m_ObjectHideFlags: 0 + m_PrefabParentObject: {fileID: 0} + m_PrefabInternal: {fileID: 0} + m_GameObject: {fileID: 1715640920} + m_LocalRotation: {x: 0, y: 0, z: 0, w: 1} + m_LocalPosition: {x: 0, y: 1, z: -10} + m_LocalScale: {x: 1, y: 1, z: 1} + m_Children: [] + m_Father: {fileID: 0} + m_RootOrder: 0 + m_LocalEulerAnglesHint: {x: 0, y: 0, z: 0} diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta new file mode 100644 index 0000000000..ec7a48aee3 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scene.unity.meta @@ -0,0 +1,8 @@ +fileFormatVersion: 2 +guid: cf1d119a8748d406e90ecb623b45f92f +timeCreated: 1504127824 +licenseType: Pro +DefaultImporter: + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta new file mode 100644 index 0000000000..dfa30689b1 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts.meta @@ -0,0 +1,9 @@ +fileFormatVersion: 2 +guid: fbcbd038eb29041f580c463e454e10fc +folderAsset: yes +timeCreated: 1503355437 +licenseType: Free +DefaultImporter: + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs new file mode 100755 index 0000000000..f776409878 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs @@ -0,0 +1,17 @@ +using System.Collections; +using System.Collections.Generic; +using UnityEngine; + +public class BasicAcademy : Academy { + + public override void AcademyReset() + { + + } + + public override void AcademyStep() + { + + } + +} diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta new file mode 100644 index 0000000000..771a61f844 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAcademy.cs.meta @@ -0,0 +1,12 @@ +fileFormatVersion: 2 +guid: 19276d4dc78ee49f1ba258293f17636c +timeCreated: 1503355437 +licenseType: Free +MonoImporter: + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs new file mode 100755 index 0000000000..17f02bbf67 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs @@ -0,0 +1,63 @@ +using System.Collections; +using System.Collections.Generic; +using UnityEngine; + +public class BasicAgent : Agent { + + public int position; + public int smallGoalPosition; + public int largeGoalPosition; + public GameObject largeGoal; + public GameObject smallGoal; + public int minPosition; + public int maxPosition; + + public override List CollectState() + { + List state = new List(); + state.Add(position); + return state; + } + + public override void AgentStep(float[] act) + { + float movement = act[0]; + int direction = 0; + if (movement == 0) { direction = -1; } + if (movement == 1) { direction = 1; } + + position += direction; + if (position < minPosition) { position = minPosition; } + if (position > maxPosition) { position = maxPosition; } + + gameObject.transform.position = new Vector3(position, 0f, 0f); + + if (position == smallGoalPosition) + { + done = true; + reward = 0.1f; + } + + if (position == largeGoalPosition) + { + done = true; + reward = 1f; + } + } + + public override void AgentReset() + { + position = 0; + minPosition = -10; + maxPosition = 10; + smallGoalPosition = -3; + largeGoalPosition = 7; + smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f); + largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f); + } + + public override void AgentOnDone() + { + + } +} diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta new file mode 100644 index 0000000000..a2ff5e21c3 --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs.meta @@ -0,0 +1,12 @@ +fileFormatVersion: 2 +guid: 624480a72e46148118ab2e2d89b537de +timeCreated: 1503355437 +licenseType: Free +MonoImporter: + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs new file mode 100755 index 0000000000..fa840ca08e --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs @@ -0,0 +1,18 @@ +using System.Collections; +using System.Collections.Generic; +using UnityEngine; + +public class BasicDecision : MonoBehaviour, Decision { + + public float[] Decide (List state, List observation, float reward, bool done, float[] memory) + { + return default(float[]); + + } + + public float[] MakeMemory (List state, List observation, float reward, bool done, float[] memory) + { + return default(float[]); + + } +} diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta new file mode 100644 index 0000000000..7f5d5b1bbc --- /dev/null +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicDecision.cs.meta @@ -0,0 +1,12 @@ +fileFormatVersion: 2 +guid: 99399d2439f894b149d8e67b85b6e07a +timeCreated: 1503355437 +licenseType: Free +MonoImporter: + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: From 737c96e4db71e4dd94f75ed3e06a3b37d89b47a4 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Sat, 23 Sep 2017 13:24:06 -0700 Subject: [PATCH 03/11] Add flexibility to continuous control as well --- python/ppo.py | 2 +- python/ppo/models.py | 82 +++++++++++++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 29 deletions(-) diff --git a/python/ppo.py b/python/ppo.py index 3f2d490a70..aec1d230f3 100755 --- a/python/ppo.py +++ b/python/ppo.py @@ -15,7 +15,7 @@ Options: --help Show this message. - --max-steps= Maximum number of steps to run environment [default: 5e6]. + --max-steps= Maximum number of steps to run environment [default: 1e6]. --run-path= The sub-directory name for model and summary statistics [default: ppo]. --load Whether to load the model or randomly initialize [default: False]. --train Whether to train model, or only run inference [default: True]. diff --git a/python/ppo/models.py b/python/ppo/models.py index 64a467c5f3..a149123e6a 100755 --- a/python/ppo/models.py +++ b/python/ppo/models.py @@ -61,28 +61,37 @@ def export_graph(model_path, env_name="env", target_nodes="action"): class PPOModel(object): - def create_visual_encoder(self, o_size_h, o_size_w, h_size): + def create_visual_encoder(self, o_size_h, o_size_w, h_size, num_streams, activation): self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, name='observation_0') - self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2], - use_bias=False, activation=tf.nn.elu) - self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2], - use_bias=False, activation=tf.nn.elu) - hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=tf.nn.elu) - return hidden - - def create_continuous_state_encoder(self, s_size, h_size): + streams = [] + for i in range(num_streams): + self.conv1 = tf.layers.conv2d(self.observation_in, 32, kernel_size=[3, 3], strides=[2, 2], + use_bias=False, activation=activation) + self.conv2 = tf.layers.conv2d(self.conv1, 64, kernel_size=[3, 3], strides=[2, 2], + use_bias=False, activation=activation) + hidden = tf.layers.dense(c_layers.flatten(self.conv2), h_size, use_bias=False, activation=activation) + streams.append(hidden) + return streams + + def create_continuous_state_encoder(self, s_size, h_size, num_streams, activation): self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') - hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.elu) - hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=tf.nn.elu) - return hidden_2 - - def create_discrete_state_encoder(self, s_size, h_size): + streams = [] + for i in range(num_streams): + hidden_1 = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=activation) + hidden_2 = tf.layers.dense(hidden_1, h_size, use_bias=False, activation=activation) + streams.append(hidden_2) + return streams + + def create_discrete_state_encoder(self, s_size, h_size, num_streams, activation): self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state') state_in = tf.reshape(self.state_in, [-1]) state_onehot = c_layers.one_hot_encoding(state_in, s_size) - hidden = tf.layers.dense(state_onehot, h_size, activation=tf.nn.elu) - return hidden + streams = [] + for i in range(num_streams): + hidden = tf.layers.dense(state_onehot, h_size, use_bias=False, activation=activation) + streams.append(hidden) + return streams def create_ppo_optimizer(self, probs, old_probs, value, entropy, beta, epsilon, lr, max_step): """ @@ -123,21 +132,38 @@ class ContinuousControlModel(PPOModel): def __init__(self, lr, brain, h_size, epsilon, max_step): """ Creates Continuous Control Actor-Critic model. - :param s_size: State-space size - :param a_size: Action-space size + :param brain: State-space size :param h_size: Hidden layer size """ s_size = brain.state_space_size a_size = brain.action_space_size - self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') + hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None + if brain.number_observations > 0: + h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] + hidden_visual = self.create_visual_encoder(h_size, w_size, h_size, 2, tf.nn.tanh) + if brain.state_space_size > 0: + s_size = brain.state_space_size + if brain.state_space_type == "continuous": + hidden_state = self.create_continuous_state_encoder(s_size, h_size, 2, tf.nn.tanh) + else: + hidden_state = self.create_discrete_state_encoder(s_size, h_size, 2, tf.nn.tanh) + + if hidden_visual is None and hidden_state is None: + raise Exception("No valid network configuration possible. " + "There are no states or observations in this brain") + elif hidden_visual is not None and hidden_state is None: + hidden_policy, hidden_value = hidden_visual + elif hidden_visual is None and hidden_state is not None: + print(hidden_state) + hidden_policy, hidden_value = hidden_state + elif hidden_visual is not None and hidden_state is not None: + hidden_policy = tf.concat([hidden_visual[0], hidden_state[0]], axis=1) + hidden_value = tf.concat([hidden_visual[1], hidden_state[1]], axis=1) + self.batch_size = tf.placeholder(shape=None, dtype=tf.int32, name='batch_size') - hidden_policy = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh) - hidden_value = tf.layers.dense(self.state_in, h_size, use_bias=False, activation=tf.nn.tanh) - hidden_policy_2 = tf.layers.dense(hidden_policy, h_size, use_bias=False, activation=tf.nn.tanh) - hidden_value_2 = tf.layers.dense(hidden_value, h_size, use_bias=False, activation=tf.nn.tanh) - self.mu = tf.layers.dense(hidden_policy_2, a_size, activation=None, use_bias=False, + self.mu = tf.layers.dense(hidden_policy, a_size, activation=None, use_bias=False, kernel_initializer=c_layers.variance_scaling_initializer(factor=0.1)) self.log_sigma_sq = tf.Variable(tf.zeros([a_size])) self.sigma_sq = tf.exp(self.log_sigma_sq) @@ -153,7 +179,7 @@ def __init__(self, lr, brain, h_size, epsilon, max_step): self.entropy = tf.reduce_sum(0.5 * tf.log(2 * np.pi * np.e * self.sigma_sq)) - self.value = tf.layers.dense(hidden_value_2, 1, activation=None, use_bias=False) + self.value = tf.layers.dense(hidden_value, 1, activation=None, use_bias=False) self.old_probs = tf.placeholder(shape=[None, a_size], dtype=tf.float32, name='old_probabilities') @@ -170,13 +196,13 @@ def __init__(self, lr, brain, h_size, epsilon, beta, max_step): hidden_state, hidden_visual, hidden = None, None, None if brain.number_observations > 0: h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] - hidden_visual = self.create_visual_encoder(h_size, w_size, h_size) + hidden_visual = self.create_visual_encoder(h_size, w_size, h_size, 1, tf.nn.elu)[0] if brain.state_space_size > 0: s_size = brain.state_space_size if brain.state_space_type == "continuous": - hidden_state = self.create_continuous_state_encoder(s_size, h_size) + hidden_state = self.create_continuous_state_encoder(s_size, h_size, 1, tf.nn.elu)[0] else: - hidden_state = self.create_discrete_state_encoder(s_size, h_size) + hidden_state = self.create_discrete_state_encoder(s_size, h_size, 1, tf.nn.elu)[0] if hidden_visual is None and hidden_state is None: raise Exception("No valid network configuration possible. " From 5b2677a891bdc1ad4e0012b61bd17c26e121d0d4 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Sat, 23 Sep 2017 13:59:10 -0700 Subject: [PATCH 04/11] Finish PPO flexible model generation implementation --- python/PPO.ipynb | 10 ++++------ python/ppo.py | 3 ++- python/ppo/models.py | 26 +++++++++++++++++++++++++- python/ppo/trainer.py | 40 +++++++++++++++++++--------------------- 4 files changed, 50 insertions(+), 29 deletions(-) diff --git a/python/PPO.ipynb b/python/PPO.ipynb index f9248d6aec..2b3a8c31d0 100755 --- a/python/PPO.ipynb +++ b/python/PPO.ipynb @@ -49,7 +49,7 @@ "train_model = True # Whether to train the model.\n", "summary_freq = 10000 # Frequency at which to save training statistics.\n", "save_freq = 50000 # Frequency at which to save model.\n", - "env_name = \"simple\" # Name of the training environment file.\n", + "env_name = \"environment\" # Name of the training environment file.\n", "\n", "### Algorithm-specific parameters for tuning\n", "gamma = 0.99 # Reward discount rate.\n", @@ -74,9 +74,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": true - }, + "metadata": {}, "outputs": [], "source": [ "env = UnityEnvironment(file_name=env_name)\n", @@ -95,7 +93,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, "scrolled": true }, "outputs": [], @@ -109,6 +106,7 @@ "\n", "is_continuous = (env.brains[brain_name].action_space_type == \"continuous\")\n", "use_observations = (env.brains[brain_name].number_observations > 0)\n", + "use_states = (env.brains[brain_name].state_space_size > 0)\n", "\n", "model_path = './models/{}'.format(run_path)\n", "summary_path = './summaries/{}'.format(run_path)\n", @@ -133,7 +131,7 @@ " steps = sess.run(ppo_model.global_step)\n", " summary_writer = tf.summary.FileWriter(summary_path)\n", " info = env.reset(train_mode=train_model)[brain_name]\n", - " trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)\n", + " trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)\n", " while steps <= max_steps:\n", " if env.global_done:\n", " info = env.reset(train_mode=train_model)[brain_name]\n", diff --git a/python/ppo.py b/python/ppo.py index aec1d230f3..f755f96fdb 100755 --- a/python/ppo.py +++ b/python/ppo.py @@ -73,6 +73,7 @@ is_continuous = (env.brains[brain_name].action_space_type == "continuous") use_observations = (env.brains[brain_name].number_observations > 0) +use_states = (env.brains[brain_name].state_space_size > 0) if not os.path.exists(model_path): os.makedirs(model_path) @@ -94,7 +95,7 @@ steps = sess.run(ppo_model.global_step) summary_writer = tf.summary.FileWriter(summary_path) info = env.reset(train_mode=train_model)[brain_name] - trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations) + trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states) while steps <= max_steps or not train_model: if env.global_done: info = env.reset(train_mode=train_model)[brain_name] diff --git a/python/ppo/models.py b/python/ppo/models.py index a149123e6a..0bf581cded 100755 --- a/python/ppo/models.py +++ b/python/ppo/models.py @@ -62,6 +62,15 @@ def export_graph(model_path, env_name="env", target_nodes="action"): class PPOModel(object): def create_visual_encoder(self, o_size_h, o_size_w, h_size, num_streams, activation): + """ + Builds a set of visual (CNN) encoders. + :param o_size_h: Height observation size. + :param o_size_w: Width observation size. + :param h_size: Hidden layer size. + :param num_streams: Number of visual streams to construct. + :param activation: What type of activation function to use for layers. + :return: List of hidden layer tensors. + """ self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, name='observation_0') streams = [] @@ -75,6 +84,14 @@ def create_visual_encoder(self, o_size_h, o_size_w, h_size, num_streams, activat return streams def create_continuous_state_encoder(self, s_size, h_size, num_streams, activation): + """ + Builds a set of hidden state encoders. + :param s_size: state input size. + :param h_size: Hidden layer size. + :param num_streams: Number of state streams to construct. + :param activation: What type of activation function to use for layers. + :return: List of hidden layer tensors. + """ self.state_in = tf.placeholder(shape=[None, s_size], dtype=tf.float32, name='state') streams = [] for i in range(num_streams): @@ -84,6 +101,14 @@ def create_continuous_state_encoder(self, s_size, h_size, num_streams, activatio return streams def create_discrete_state_encoder(self, s_size, h_size, num_streams, activation): + """ + Builds a set of hidden state encoders from discrete state input. + :param s_size: state input size (discrete). + :param h_size: Hidden layer size. + :param num_streams: Number of state streams to construct. + :param activation: What type of activation function to use for layers. + :return: List of hidden layer tensors. + """ self.state_in = tf.placeholder(shape=[None, 1], dtype=tf.int32, name='state') state_in = tf.reshape(self.state_in, [-1]) state_onehot = c_layers.one_hot_encoding(state_in, s_size) @@ -155,7 +180,6 @@ def __init__(self, lr, brain, h_size, epsilon, max_step): elif hidden_visual is not None and hidden_state is None: hidden_policy, hidden_value = hidden_visual elif hidden_visual is None and hidden_state is not None: - print(hidden_state) hidden_policy, hidden_value = hidden_state elif hidden_visual is not None and hidden_state is not None: hidden_policy = tf.concat([hidden_visual[0], hidden_state[0]], axis=1) diff --git a/python/ppo/trainer.py b/python/ppo/trainer.py index 393fc104c3..0eaa806387 100755 --- a/python/ppo/trainer.py +++ b/python/ppo/trainer.py @@ -5,7 +5,7 @@ class Trainer(object): - def __init__(self, ppo_model, sess, info, is_continuous, use_observations): + def __init__(self, ppo_model, sess, info, is_continuous, use_observations, use_states): """ Responsible for collecting experinces and training PPO model. :param ppo_model: Tensorflow graph defining model. @@ -26,6 +26,7 @@ def __init__(self, ppo_model, sess, info, is_continuous, use_observations): self.is_continuous = is_continuous self.use_observations = use_observations + self.use_states = use_states def take_action(self, info, env, brain_name): """ @@ -36,15 +37,14 @@ def take_action(self, info, env, brain_name): :return: BrainInfo corresponding to new environment state. """ epsi = None + feed_dict = {self.model.batch_size: len(info.states)} if self.is_continuous: epsi = np.random.randn(len(info.states), env.brains[brain_name].action_space_size) - feed_dict = {self.model.state_in: info.states, self.model.batch_size: len(info.states), - self.model.epsilon: epsi} - elif self.use_observations: - feed_dict = {self.model.observation_in: np.vstack(info.observations), - self.model.batch_size: len(info.states)} - else: - feed_dict = {self.model.state_in: info.states, self.model.batch_size: len(info.states)} + feed_dict[self.model.epsilon] = epsi + if self.use_observations: + feed_dict[self.model.observation_in] = np.vstack(info.observations) + if self.use_states: + feed_dict[self.model.state_in] = info.states actions, a_dist, value, ent, learn_rate = self.sess.run([self.model.output, self.model.probs, self.model.value, self.model.entropy, self.model.learning_rate], @@ -72,13 +72,13 @@ def add_experiences(self, info, next_info, epsi, actions, a_dist, value): if not info.local_done[idx]: if self.use_observations: history['observations'].append(info.observations[idx]) - else: + if self.use_states: history['states'].append(info.states[idx]) + if self.is_continuous: + history['epsilons'].append(epsi[idx]) history['actions'].append(actions[idx]) history['rewards'].append(next_info.rewards[idx]) history['action_probs'].append(a_dist[idx]) - if self.is_continuous: - history['epsilons'].append(epsi[idx]) history['value_estimates'].append(value[idx][0]) history['cumulative_reward'] += next_info.rewards[idx] history['episode_steps'] += 1 @@ -98,12 +98,11 @@ def process_experiences(self, info, time_horizon, gamma, lambd): if info.local_done[l]: value_next = 0.0 else: + feed_dict = {self.model.batch_size: len(info.states)} if self.use_observations: - feed_dict = {self.model.observation_in: np.vstack(info.observations), - self.model.batch_size: len(info.states)} - else: - feed_dict = {self.model.state_in: info.states, - self.model.batch_size: len(info.states)} + feed_dict[self.model.observation_in] = np.vstack(info.observations) + if self.use_states: + feed_dict[self.model.state_in] = info.states value_next = self.sess.run(self.model.value, feed_dict)[l] history = vectorize_history(self.history_dict[info.agents[l]]) history['advantages'] = get_gae(rewards=history['rewards'], @@ -140,13 +139,12 @@ def update_model(self, batch_size, num_epoch): self.model.old_probs: np.vstack(training_buffer['action_probs'][start:end])} if self.is_continuous: feed_dict[self.model.epsilon] = np.vstack(training_buffer['epsilons'][start:end]) - feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end]) else: feed_dict[self.model.action_holder] = np.hstack(training_buffer['actions'][start:end]) - if self.use_observations: - feed_dict[self.model.observation_in] = np.vstack(training_buffer['observations'][start:end]) - else: - feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end]) + if self.use_states: + feed_dict[self.model.state_in] = np.vstack(training_buffer['states'][start:end]) + if self.use_observations: + feed_dict[self.model.observation_in] = np.vstack(training_buffer['observations'][start:end]) v_loss, p_loss, _ = self.sess.run([self.model.value_loss, self.model.policy_loss, self.model.update_batch], feed_dict=feed_dict) total_v += v_loss From 4990baaf784f6ec2a62d6cac1b7190fa7ac77445 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Sat, 23 Sep 2017 14:00:11 -0700 Subject: [PATCH 05/11] Fix formatting --- .../Examples/Basic/Scripts/BasicAgent.cs | 47 ++++++++++--------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs index 17f02bbf67..7db45372e8 100755 --- a/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs +++ b/unity-environment/Assets/ML-Agents/Examples/Basic/Scripts/BasicAgent.cs @@ -2,29 +2,30 @@ using System.Collections.Generic; using UnityEngine; -public class BasicAgent : Agent { +public class BasicAgent : Agent +{ public int position; public int smallGoalPosition; - public int largeGoalPosition; - public GameObject largeGoal; - public GameObject smallGoal; - public int minPosition; + public int largeGoalPosition; + public GameObject largeGoal; + public GameObject smallGoal; + public int minPosition; public int maxPosition; - public override List CollectState() - { - List state = new List(); + public override List CollectState() + { + List state = new List(); state.Add(position); - return state; - } + return state; + } - public override void AgentStep(float[] act) - { + public override void AgentStep(float[] act) + { float movement = act[0]; int direction = 0; if (movement == 0) { direction = -1; } - if (movement == 1) { direction = 1; } + if (movement == 1) { direction = 1; } position += direction; if (position < minPosition) { position = minPosition; } @@ -38,26 +39,26 @@ public override void AgentStep(float[] act) reward = 0.1f; } - if (position == largeGoalPosition) + if (position == largeGoalPosition) { done = true; reward = 1f; } - } + } - public override void AgentReset() - { + public override void AgentReset() + { position = 0; minPosition = -10; maxPosition = 10; smallGoalPosition = -3; largeGoalPosition = 7; - smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f); - largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f); - } + smallGoal.transform.position = new Vector3(smallGoalPosition, 0f, 0f); + largeGoal.transform.position = new Vector3(largeGoalPosition, 0f, 0f); + } - public override void AgentOnDone() - { + public override void AgentOnDone() + { - } + } } From 1439d2cdb8aed3e371c17e674b4b2dd83bfb9bc8 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Mon, 25 Sep 2017 11:26:44 -0700 Subject: [PATCH 06/11] Support color observations --- python/ppo/models.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/python/ppo/models.py b/python/ppo/models.py index 0bf581cded..a8a79bafab 100755 --- a/python/ppo/models.py +++ b/python/ppo/models.py @@ -20,11 +20,7 @@ def create_agent_model(env, lr=1e-4, h_size=128, epsilon=0.2, beta=1e-3, max_ste brain_name = env.brain_names[0] brain = env.brains[brain_name] if brain.action_space_type == "continuous": - if brain.number_observations == 0: - return ContinuousControlModel(lr, brain, h_size, epsilon, max_step) - else: - raise UnityEnvironmentException("There is currently no PPO model which supports both a continuous " - "action space and camera observations.") + return ContinuousControlModel(lr, brain, h_size, epsilon, max_step) if brain.action_space_type == "discrete": return DiscreteControlModel(lr, brain, h_size, epsilon, beta, max_step) @@ -61,17 +57,23 @@ def export_graph(model_path, env_name="env", target_nodes="action"): class PPOModel(object): - def create_visual_encoder(self, o_size_h, o_size_w, h_size, num_streams, activation): + def create_visual_encoder(self, o_size_h, o_size_w, bw, h_size, num_streams, activation): """ Builds a set of visual (CNN) encoders. :param o_size_h: Height observation size. :param o_size_w: Width observation size. + :param bw: Whether image is greyscale {True} or color {False}. :param h_size: Hidden layer size. :param num_streams: Number of visual streams to construct. :param activation: What type of activation function to use for layers. :return: List of hidden layer tensors. """ - self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, 1], dtype=tf.float32, + if bw: + c_channels = 1 + else: + c_channels = 3 + + self.observation_in = tf.placeholder(shape=[None, o_size_h, o_size_w, c_channels], dtype=tf.float32, name='observation_0') streams = [] for i in range(num_streams): @@ -166,7 +168,8 @@ def __init__(self, lr, brain, h_size, epsilon, max_step): hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None if brain.number_observations > 0: h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] - hidden_visual = self.create_visual_encoder(h_size, w_size, h_size, 2, tf.nn.tanh) + bw = brain.camera_resolutions[0]['blackAndWhite'] + hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 2, tf.nn.tanh) if brain.state_space_size > 0: s_size = brain.state_space_size if brain.state_space_type == "continuous": @@ -220,7 +223,8 @@ def __init__(self, lr, brain, h_size, epsilon, beta, max_step): hidden_state, hidden_visual, hidden = None, None, None if brain.number_observations > 0: h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] - hidden_visual = self.create_visual_encoder(h_size, w_size, h_size, 1, tf.nn.elu)[0] + bw = brain.camera_resolutions[0]['blackAndWhite'] + hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 1, tf.nn.elu)[0] if brain.state_space_size > 0: s_size = brain.state_space_size if brain.state_space_type == "continuous": From 73c42223914a312b867c3900857e482024b53654 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Mon, 25 Sep 2017 12:13:34 -0700 Subject: [PATCH 07/11] Add best practices document --- docs/Readme.md | 1 + docs/best-practices.md | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 docs/best-practices.md diff --git a/docs/Readme.md b/docs/Readme.md index 594f0cd1dd..ef93562a34 100644 --- a/docs/Readme.md +++ b/docs/Readme.md @@ -7,6 +7,7 @@ ## Advanced * [How to make a new Unity Environment](Making-a-new-Unity-Environment.md) + * [Best Practices when Designing an Environment](best-practices.md) * [How to organize the Scene](Organizing-the-Scene.md) * [How to use the Python API](Unity-Agents---Python-API.md) * [How to use TensorflowSharp inside Unity [Experimental]](Using-TensorFlow-Sharp-in-Unity-(Experimental).md) diff --git a/docs/best-practices.md b/docs/best-practices.md new file mode 100644 index 0000000000..8bcf10fef8 --- /dev/null +++ b/docs/best-practices.md @@ -0,0 +1,20 @@ +# Environment Design Best Practices + +## General +* It is often helpful to being with the simplest version of the problem, to ensure the agent can learn it. From there increase +complexity over time. +* When possible, It is often helpful to ensure that you can complete the task by using a Player Brain to control the agent. + +## Rewards +* The magnitude of any given reward should typically not be greater than 1.0 in order to ensure a more stable learning process. +* Positive rewards are often more helpful to shaping the desired behavior of an agent than negative rewards. +* For locomotion tasks, a small positive reward (+0.1) for forward progress is typically used. +* If you want the agent the finish a task quickly, it is often helpful to provide a small penalty every step (-0.1). + +## States +* The magnitude of each state variable should be normalized to around 1.0. +* States should include all variables relevant to allowing the agent to take the optimally informed decision. +* Categorical state variables such as type of object (Sword, Shield, Bow) should be encoded in one-hot fashion (ie `3` -> `0, 0, 1`). + +## Actions +* When using continuous control, action values should be clipped to an appropriate range. From b8142f763df9d0576ea31a7fda01b8e289b1cdb8 Mon Sep 17 00:00:00 2001 From: vincentpierre Date: Mon, 25 Sep 2017 13:16:24 -0700 Subject: [PATCH 08/11] bug fix for non square observations --- python/ppo/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ppo/models.py b/python/ppo/models.py index a8a79bafab..97aeff8576 100755 --- a/python/ppo/models.py +++ b/python/ppo/models.py @@ -167,7 +167,7 @@ def __init__(self, lr, brain, h_size, epsilon, max_step): hidden_state, hidden_visual, hidden_policy, hidden_value = None, None, None, None if brain.number_observations > 0: - h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] + h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width'] bw = brain.camera_resolutions[0]['blackAndWhite'] hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 2, tf.nn.tanh) if brain.state_space_size > 0: @@ -222,7 +222,7 @@ def __init__(self, lr, brain, h_size, epsilon, beta, max_step): """ hidden_state, hidden_visual, hidden = None, None, None if brain.number_observations > 0: - h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['height'] + h_size, w_size = brain.camera_resolutions[0]['height'], brain.camera_resolutions[0]['width'] bw = brain.camera_resolutions[0]['blackAndWhite'] hidden_visual = self.create_visual_encoder(h_size, w_size, bw, h_size, 1, tf.nn.elu)[0] if brain.state_space_size > 0: From eaab83477b5ab95becf319d8525f6f696dcb1de1 Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Mon, 25 Sep 2017 13:17:45 -0700 Subject: [PATCH 09/11] Update Readme.md --- docs/Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Readme.md b/docs/Readme.md index ef93562a34..802583af25 100644 --- a/docs/Readme.md +++ b/docs/Readme.md @@ -7,7 +7,7 @@ ## Advanced * [How to make a new Unity Environment](Making-a-new-Unity-Environment.md) - * [Best Practices when Designing an Environment](best-practices.md) + * [Best practices when designing an Environment](best-practices.md) * [How to organize the Scene](Organizing-the-Scene.md) * [How to use the Python API](Unity-Agents---Python-API.md) * [How to use TensorflowSharp inside Unity [Experimental]](Using-TensorFlow-Sharp-in-Unity-(Experimental).md) From e6495cf366b7db4a43506e50ce6fa90261dee46a Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Mon, 25 Sep 2017 13:22:51 -0700 Subject: [PATCH 10/11] Remove scipy dependency --- docs/Getting-Started-with-Balance-Ball.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/Getting-Started-with-Balance-Ball.md b/docs/Getting-Started-with-Balance-Ball.md index 8b4cc6f2b4..c4c91f3e50 100644 --- a/docs/Getting-Started-with-Balance-Ball.md +++ b/docs/Getting-Started-with-Balance-Ball.md @@ -51,7 +51,6 @@ If you are a Windows user who is new to Python/TensorFlow, follow [this guide](h * numpy * Pillow * Python (2 or 3) -* scipy * TensorFlow (1.0+) ### Installing Dependencies From 4a945a7ebed7ee86196d43d5ebaaabecbfbf82ab Mon Sep 17 00:00:00 2001 From: Arthur Juliani Date: Mon, 25 Sep 2017 13:33:30 -0700 Subject: [PATCH 11/11] Add installation doc --- docs/Readme.md | 1 + docs/installation.md | 51 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 docs/installation.md diff --git a/docs/Readme.md b/docs/Readme.md index 802583af25..590a8f3a11 100644 --- a/docs/Readme.md +++ b/docs/Readme.md @@ -2,6 +2,7 @@ ## Basic * [Unity ML Agents Overview](Unity-Agents-Overview.md) + * [Installation & Set-up](installation.md) * [Getting Started with the Balance Ball Environment](Getting-Started-with-Balance-Ball.md) * [Example Environments](Example-Environments.md) diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000000..af717b82a7 --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,51 @@ +# Installation & Set-up + +## Install **Unity 2017.1** or later (required) + +Download link available [here](https://store.unity.com/download?ref=update). + +## Clone the repository +Once installed, you will want to clone the Agents GitHub repository. References will be made +throughout to `unity-environment` and `python` directories. Both are located at the root of the repository. + +## Installing Python API +In order to train an agent within the framework, you will need to install Python 2 or 3, and the dependencies described below. + +### Windows Users + +If you are a Windows user who is new to Python/TensorFlow, follow [this guide](https://nitishmutha.github.io/tensorflow/2017/01/22/TensorFlow-with-gpu-for-windows.html) to set up your Python environment. + +### Requirements +* Jupyter +* Matplotlib +* numpy +* Pillow +* Python (2 or 3) +* docopt (Training) +* TensorFlow (1.0+) (Training) + +### Installing Dependencies +To install dependencies, go into the `python` directory and run (depending on your python version): + +`pip install .` + +or + +`pip3 install .` + +If your Python environment doesn't include `pip`, see these [instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers) on installing it. + +Once the requirements are successfully installed, the next step is to check out the [Getting Started guide](Getting-Started-with-Balance-Ball.md) + +## Installation Help + +### Using Jupyter Notebook + +For a walkthrough of how to use Jupyter notebook, see [here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/execute.html). + +### General Issues + +If you run into issues while attempting to install and run Unity ML Agents, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Limitations-&-Common-Issues.md) for a list of common issues and solutions. + +If you have an issue that isn't covered here, feel free to contact us at ml-agents@unity3d.com. Alternatively, feel free to create an issue on the repository. +Be sure to include relevant information on OS, Python version, and exact error message if possible. \ No newline at end of file