Cherrypick bug fixes to release_9_branch (#4617)

Ruo-Ping (Rachel) Dong · Ervin T · Chris Elion · web-flow · commit 79faafdb43c2 · 2020-11-02T14:55:26.000-08:00
* [bug-fix] Don't load non-wrapped policy (#4593) * pin cattrs version * cap PyTorch version * use v2 action and pin python version (#4568) Co-authored-by: Ervin T <ervin@unity3d.com> Co-authored-by: Chris Elion <chris.elion@unity3d.com>
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -10,7 +10,9 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - uses: actions/setup-python@v1
+    - uses: actions/setup-python@v2
+      with:
+        python-version: 3.7.x
     - uses: actions/setup-ruby@v1
       with:
         ruby-version: '2.6'
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -51,7 +51,9 @@ jobs:
         python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }}
         python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }}
     - name: Save python dependencies
-      run: pip freeze > pip_versions-${{ matrix.python-version }}.txt
+      run: |
+        pip freeze > pip_versions-${{ matrix.python-version }}.txt
+        cat pip_versions-${{ matrix.python-version }}.txt
     - name: Run pytest
       run: pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=junit/test-results-${{ matrix.python-version }}.xml -p no:warnings
     - name: Upload pytest test results
diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
@@ -7,6 +7,20 @@ and this project adheres to
 [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
 
 
+## [Unreleased] - 2020-11-04
+### Major Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Minor Changes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+
+### Bug Fixes
+#### com.unity.ml-agents (C#)
+#### ml-agents / ml-agents-envs / gym-unity (Python)
+ - Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
+
 ## [1.5.0-preview] - 2020-10-14
 ### Major Changes
 #### com.unity.ml-agents (C#)
diff --git a/ml-agents/mlagents/trainers/ghost/trainer.py b/ml-agents/mlagents/trainers/ghost/trainer.py
@@ -146,11 +146,11 @@ def get_step(self) -> int:
     @property
     def reward_buffer(self) -> Deque[float]:
         """
-         Returns the reward buffer. The reward buffer contains the cumulative
-         rewards of the most recent episodes completed by agents using this
-         trainer.
-         :return: the reward buffer.
-         """
+        Returns the reward buffer. The reward buffer contains the cumulative
+        rewards of the most recent episodes completed by agents using this
+        trainer.
+        :return: the reward buffer.
+        """
         return self.trainer.reward_buffer
 
     @property
@@ -319,7 +319,6 @@ def create_policy(
         policy = self.trainer.create_policy(
             parsed_behavior_id, behavior_spec, create_graph=True
         )
-        self.trainer.model_saver.initialize_or_load(policy)
         team_id = parsed_behavior_id.team_id
         self.controller.subscribe_team_id(team_id, self)
 
@@ -337,6 +336,11 @@ def create_policy(
             self._save_snapshot()  # Need to save after trainer initializes policy
             self._learning_team = self.controller.get_learning_team
             self.wrapped_trainer_team = team_id
+        else:
+            # Load the weights of the ghost policy from the wrapped one
+            policy.load_weights(
+                self.trainer.get_policy(parsed_behavior_id).get_weights()
+            )
         return policy
 
     def add_policy(
diff --git a/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py b/ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py
@@ -57,6 +57,50 @@ def test_load_and_set(dummy_config, use_discrete):
         np.testing.assert_array_equal(w, lw)
 
 
+def test_resume(dummy_config, tmp_path):
+    mock_specs = mb.setup_test_behavior_specs(
+        True, False, vector_action_space=[2], vector_obs_space=1
+    )
+    behavior_id_team0 = "test_brain?team=0"
+    behavior_id_team1 = "test_brain?team=1"
+    brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
+    tmp_path = tmp_path.as_posix()
+    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
+    controller = GhostController(100)
+    trainer = GhostTrainer(
+        ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+
+    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
+    policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
+    trainer.add_policy(parsed_behavior_id0, policy)
+
+    parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
+    policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
+    trainer.add_policy(parsed_behavior_id1, policy)
+
+    trainer.save_model()
+
+    # Make a new trainer, check that the policies are the same
+    ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
+    trainer2 = GhostTrainer(
+        ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+    policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
+    trainer2.add_policy(parsed_behavior_id0, policy)
+
+    policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
+    trainer2.add_policy(parsed_behavior_id1, policy)
+
+    trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
+    trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
+    weights = trainer1_policy.get_weights()
+    weights2 = trainer2_policy.get_weights()
+
+    for w, lw in zip(weights, weights2):
+        np.testing.assert_array_equal(w, lw)
+
+
 def test_process_trajectory(dummy_config):
     mock_specs = mb.setup_test_behavior_specs(
         True, False, vector_action_space=[2], vector_obs_space=1
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_ghost.py b/ml-agents/mlagents/trainers/tests/torch/test_ghost.py
@@ -59,6 +59,50 @@ def test_load_and_set(dummy_config, use_discrete):
         np.testing.assert_array_equal(w, lw)
 
 
+def test_resume(dummy_config, tmp_path):
+    mock_specs = mb.setup_test_behavior_specs(
+        True, False, vector_action_space=[2], vector_obs_space=1
+    )
+    behavior_id_team0 = "test_brain?team=0"
+    behavior_id_team1 = "test_brain?team=1"
+    brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
+    tmp_path = tmp_path.as_posix()
+    ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
+    controller = GhostController(100)
+    trainer = GhostTrainer(
+        ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+
+    parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
+    policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
+    trainer.add_policy(parsed_behavior_id0, policy)
+
+    parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
+    policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
+    trainer.add_policy(parsed_behavior_id1, policy)
+
+    trainer.save_model()
+
+    # Make a new trainer, check that the policies are the same
+    ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
+    trainer2 = GhostTrainer(
+        ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
+    )
+    policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
+    trainer2.add_policy(parsed_behavior_id0, policy)
+
+    policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
+    trainer2.add_policy(parsed_behavior_id1, policy)
+
+    trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
+    trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
+    weights = trainer1_policy.get_weights()
+    weights2 = trainer2_policy.get_weights()
+
+    for w, lw in zip(weights, weights2):
+        np.testing.assert_array_equal(w, lw)
+
+
 def test_process_trajectory(dummy_config):
     mock_specs = mb.setup_test_behavior_specs(
         True, False, vector_action_space=[2], vector_obs_space=1
diff --git a/ml-agents/setup.py b/ml-agents/setup.py
@@ -64,7 +64,8 @@ def run(self):
         "protobuf>=3.6",
         "pyyaml>=3.1.0",
         "tensorflow>=1.14,<3.0",
-        "cattrs>=1.0.0",
+        # cattrs 1.1.0 dropped support for python 3.6.
+        "cattrs>=1.0.0,<1.1.0",
         "attrs>=19.3.0",
         'pypiwin32==223;platform_system=="Windows"',
         # We don't actually need six, but tensorflow does, and pip seems
@@ -79,5 +80,5 @@ def run(self):
         ]
     },
     cmdclass={"verify": VerifyVersionCommand},
-    extras_require={"torch": ["torch>=1.5.0"]},
+    extras_require={"torch": ["torch>=1.5.0,<1.7.0"]},
 )
diff --git a/test_requirements.txt b/test_requirements.txt
@@ -4,6 +4,6 @@ pytest-cov==2.6.1
 pytest-xdist==1.34.0
 
 # PyTorch tests are here for the time being, before they are used in the codebase.
-torch>=1.5.0
+torch>=1.5.0, <1.7.0
 
 tf2onnx>=1.5.5