Unity-Technologies · awjuliani · Sep 25, 2017 · Sep 22, 2017 · Sep 23, 2017 · Sep 23, 2017
diff --git a/docs/Getting-Started-with-Balance-Ball.md b/docs/Getting-Started-with-Balance-Ball.md
@@ -51,7 +51,6 @@ If you are a Windows user who is new to Python/TensorFlow, follow [this guide](h
 * numpy
 * Pillow
 * Python (2 or 3)
-* scipy
 * TensorFlow (1.0+)
 
 ### Installing Dependencies

diff --git a/docs/Readme.md b/docs/Readme.md
@@ -2,11 +2,13 @@
 
 ## Basic 
  * [Unity ML Agents Overview](Unity-Agents-Overview.md)
+ * [Installation & Set-up](installation.md)
  * [Getting Started with the Balance Ball Environment](Getting-Started-with-Balance-Ball.md)
  * [Example Environments](Example-Environments.md)
 
 ## Advanced
  * [How to make a new Unity Environment](Making-a-new-Unity-Environment.md)
+ * [Best practices when designing an Environment](best-practices.md)
  * [How to organize the Scene](Organizing-the-Scene.md)
  * [How to use the Python API](Unity-Agents---Python-API.md)
  * [How to use TensorflowSharp inside Unity [Experimental]](Using-TensorFlow-Sharp-in-Unity-(Experimental).md)

diff --git a/docs/best-practices.md b/docs/best-practices.md
@@ -0,0 +1,20 @@
+# Environment Design Best Practices
+
+## General
+* It is often helpful to being with the simplest version of the problem, to ensure the agent can learn it. From there increase
+complexity over time.
+* When possible, It is often helpful to ensure that you can complete the task by using a Player Brain to control the agent.
+
+## Rewards
+* The magnitude of any given reward should typically not be greater than 1.0 in order to ensure a more stable learning process.
+* Positive rewards are often more helpful to shaping the desired behavior of an agent than negative rewards.
+* For locomotion tasks, a small positive reward (+0.1) for forward progress is typically used. 
+* If you want the agent the finish a task quickly, it is often helpful to provide a small penalty every step (-0.1). 
+
+## States
+* The magnitude of each state variable should be normalized to around 1.0. 
+* States should include all variables relevant to allowing the agent to take the optimally informed decision.
+* Categorical state variables such as type of object (Sword, Shield, Bow) should be encoded in one-hot fashion (ie `3` -> `0, 0, 1`).
+
+## Actions
+* When using continuous control, action values should be clipped to an appropriate range.
diff --git a/docs/installation.md b/docs/installation.md
@@ -0,0 +1,51 @@
+# Installation & Set-up
+
+## Install **Unity 2017.1** or later (required)
+
+Download link available [here](https://store.unity.com/download?ref=update).
+
+## Clone the repository
+Once installed, you will want to clone the Agents GitHub repository. References will be made 
+throughout to `unity-environment` and `python` directories. Both are located at the root of the repository. 
+
+## Installing Python API
+In order to train an agent within the framework, you will need to install Python 2 or 3, and the dependencies described below.
+
+### Windows Users
+
+If you are a Windows user who is new to Python/TensorFlow, follow [this guide](https://nitishmutha.github.io/tensorflow/2017/01/22/TensorFlow-with-gpu-for-windows.html) to set up your Python environment.
+
+### Requirements
+* Jupyter
+* Matplotlib
+* numpy
+* Pillow
+* Python (2 or 3)
+* docopt (Training)
+* TensorFlow (1.0+) (Training)
+
+### Installing Dependencies
+To install dependencies, go into the `python` directory and run (depending on your python version):
+
+`pip install .`
+
+or 
+
+`pip3 install  .`
+
+If your Python environment doesn't include `pip`, see these [instructions](https://packaging.python.org/guides/installing-using-linux-tools/#installing-pip-setuptools-wheel-with-linux-package-managers) on installing it.
+
+Once the requirements are successfully installed, the next step is to check out the [Getting Started guide](Getting-Started-with-Balance-Ball.md)
+
+## Installation Help
+
+### Using Jupyter Notebook
+
+For a walkthrough of how to use Jupyter notebook, see [here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/execute.html).
+
+### General Issues
+
+If you run into issues while attempting to install and run Unity ML Agents, see [here](https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Limitations-&-Common-Issues.md) for a list of common issues and solutions.
+
+If you have an issue that isn't covered here, feel free to contact us at [email protected]. Alternatively, feel free to create an issue on the repository.
+Be sure to include relevant information on OS, Python version, and exact error message if possible.
diff --git a/python/PPO.ipynb b/python/PPO.ipynb
@@ -49,7 +49,7 @@
     "train_model = True # Whether to train the model.\n",
     "summary_freq = 10000 # Frequency at which to save training statistics.\n",
     "save_freq = 50000 # Frequency at which to save model.\n",
-    "env_name = \"simple\" # Name of the training environment file.\n",
+    "env_name = \"environment\" # Name of the training environment file.\n",
     "\n",
     "### Algorithm-specific parameters for tuning\n",
     "gamma = 0.99 # Reward discount rate.\n",
@@ -74,9 +74,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
     "env = UnityEnvironment(file_name=env_name)\n",
@@ -95,7 +93,6 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
     "scrolled": true
    },
    "outputs": [],
@@ -109,6 +106,7 @@
     "\n",
     "is_continuous = (env.brains[brain_name].action_space_type == \"continuous\")\n",
     "use_observations = (env.brains[brain_name].number_observations > 0)\n",
+    "use_states = (env.brains[brain_name].state_space_size > 0)\n",
     "\n",
     "model_path = './models/{}'.format(run_path)\n",
     "summary_path = './summaries/{}'.format(run_path)\n",
@@ -133,7 +131,7 @@
     "    steps = sess.run(ppo_model.global_step)\n",
     "    summary_writer = tf.summary.FileWriter(summary_path)\n",
     "    info = env.reset(train_mode=train_model)[brain_name]\n",
-    "    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)\n",
+    "    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)\n",
     "    while steps <= max_steps:\n",
     "        if env.global_done:\n",
     "            info = env.reset(train_mode=train_model)[brain_name]\n",

diff --git a/python/ppo.py b/python/ppo.py
@@ -15,7 +15,7 @@
 
 Options:
   --help                     Show this message.
-  --max-steps=<n>             Maximum number of steps to run environment [default: 5e6].
+  --max-steps=<n>             Maximum number of steps to run environment [default: 1e6].
   --run-path=<path>          The sub-directory name for model and summary statistics [default: ppo].
   --load                     Whether to load the model or randomly initialize [default: False].
   --train                    Whether to train model, or only run inference [default: True].
@@ -73,6 +73,7 @@
 
 is_continuous = (env.brains[brain_name].action_space_type == "continuous")
 use_observations = (env.brains[brain_name].number_observations > 0)
+use_states = (env.brains[brain_name].state_space_size > 0)
 
 if not os.path.exists(model_path):
     os.makedirs(model_path)
@@ -94,7 +95,7 @@
     steps = sess.run(ppo_model.global_step)
     summary_writer = tf.summary.FileWriter(summary_path)
     info = env.reset(train_mode=train_model)[brain_name]
-    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)
+    trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations, use_states)
     while steps <= max_steps or not train_model:
         if env.global_done:
             info = env.reset(train_mode=train_model)[brain_name]