Skip to content

Commit 79faafd

Browse files
Ruo-Ping (Rachel) DongErvin TChris Elion
authored
Cherrypick bug fixes to release_9_branch (#4617)
* [bug-fix] Don't load non-wrapped policy (#4593) * pin cattrs version * cap PyTorch version * use v2 action and pin python version (#4568) Co-authored-by: Ervin T <[email protected]> Co-authored-by: Chris Elion <[email protected]>
1 parent 223ecaf commit 79faafd

File tree

8 files changed

+122
-11
lines changed

8 files changed

+122
-11
lines changed

.github/workflows/pre-commit.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ jobs:
1010
runs-on: ubuntu-latest
1111
steps:
1212
- uses: actions/checkout@v2
13-
- uses: actions/setup-python@v1
13+
- uses: actions/setup-python@v2
14+
with:
15+
python-version: 3.7.x
1416
- uses: actions/setup-ruby@v1
1517
with:
1618
ruby-version: '2.6'

.github/workflows/pytest.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ jobs:
5151
python -m pip install --progress-bar=off -r test_requirements.txt -c ${{ matrix.pip_constraints }}
5252
python -m pip install --progress-bar=off -e ./gym-unity -c ${{ matrix.pip_constraints }}
5353
- name: Save python dependencies
54-
run: pip freeze > pip_versions-${{ matrix.python-version }}.txt
54+
run: |
55+
pip freeze > pip_versions-${{ matrix.python-version }}.txt
56+
cat pip_versions-${{ matrix.python-version }}.txt
5557
- name: Run pytest
5658
run: pytest --cov=ml-agents --cov=ml-agents-envs --cov=gym-unity --cov-report html --junitxml=junit/test-results-${{ matrix.python-version }}.xml -p no:warnings
5759
- name: Upload pytest test results

com.unity.ml-agents/CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,20 @@ and this project adheres to
77
[Semantic Versioning](http://semver.org/spec/v2.0.0.html).
88

99

10+
## [Unreleased] - 2020-11-04
11+
### Major Changes
12+
#### com.unity.ml-agents (C#)
13+
#### ml-agents / ml-agents-envs / gym-unity (Python)
14+
15+
### Minor Changes
16+
#### com.unity.ml-agents (C#)
17+
#### ml-agents / ml-agents-envs / gym-unity (Python)
18+
19+
### Bug Fixes
20+
#### com.unity.ml-agents (C#)
21+
#### ml-agents / ml-agents-envs / gym-unity (Python)
22+
- Fixed an issue where runs could not be resumed when using TensorFlow and Ghost Training. (#4593)
23+
1024
## [1.5.0-preview] - 2020-10-14
1125
### Major Changes
1226
#### com.unity.ml-agents (C#)

ml-agents/mlagents/trainers/ghost/trainer.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,11 @@ def get_step(self) -> int:
146146
@property
147147
def reward_buffer(self) -> Deque[float]:
148148
"""
149-
Returns the reward buffer. The reward buffer contains the cumulative
150-
rewards of the most recent episodes completed by agents using this
151-
trainer.
152-
:return: the reward buffer.
153-
"""
149+
Returns the reward buffer. The reward buffer contains the cumulative
150+
rewards of the most recent episodes completed by agents using this
151+
trainer.
152+
:return: the reward buffer.
153+
"""
154154
return self.trainer.reward_buffer
155155

156156
@property
@@ -319,7 +319,6 @@ def create_policy(
319319
policy = self.trainer.create_policy(
320320
parsed_behavior_id, behavior_spec, create_graph=True
321321
)
322-
self.trainer.model_saver.initialize_or_load(policy)
323322
team_id = parsed_behavior_id.team_id
324323
self.controller.subscribe_team_id(team_id, self)
325324

@@ -337,6 +336,11 @@ def create_policy(
337336
self._save_snapshot() # Need to save after trainer initializes policy
338337
self._learning_team = self.controller.get_learning_team
339338
self.wrapped_trainer_team = team_id
339+
else:
340+
# Load the weights of the ghost policy from the wrapped one
341+
policy.load_weights(
342+
self.trainer.get_policy(parsed_behavior_id).get_weights()
343+
)
340344
return policy
341345

342346
def add_policy(

ml-agents/mlagents/trainers/tests/tensorflow/test_ghost.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,50 @@ def test_load_and_set(dummy_config, use_discrete):
5757
np.testing.assert_array_equal(w, lw)
5858

5959

60+
def test_resume(dummy_config, tmp_path):
61+
mock_specs = mb.setup_test_behavior_specs(
62+
True, False, vector_action_space=[2], vector_obs_space=1
63+
)
64+
behavior_id_team0 = "test_brain?team=0"
65+
behavior_id_team1 = "test_brain?team=1"
66+
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
67+
tmp_path = tmp_path.as_posix()
68+
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
69+
controller = GhostController(100)
70+
trainer = GhostTrainer(
71+
ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
72+
)
73+
74+
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
75+
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
76+
trainer.add_policy(parsed_behavior_id0, policy)
77+
78+
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
79+
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
80+
trainer.add_policy(parsed_behavior_id1, policy)
81+
82+
trainer.save_model()
83+
84+
# Make a new trainer, check that the policies are the same
85+
ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
86+
trainer2 = GhostTrainer(
87+
ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
88+
)
89+
policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
90+
trainer2.add_policy(parsed_behavior_id0, policy)
91+
92+
policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
93+
trainer2.add_policy(parsed_behavior_id1, policy)
94+
95+
trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
96+
trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
97+
weights = trainer1_policy.get_weights()
98+
weights2 = trainer2_policy.get_weights()
99+
100+
for w, lw in zip(weights, weights2):
101+
np.testing.assert_array_equal(w, lw)
102+
103+
60104
def test_process_trajectory(dummy_config):
61105
mock_specs = mb.setup_test_behavior_specs(
62106
True, False, vector_action_space=[2], vector_obs_space=1

ml-agents/mlagents/trainers/tests/torch/test_ghost.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,50 @@ def test_load_and_set(dummy_config, use_discrete):
5959
np.testing.assert_array_equal(w, lw)
6060

6161

62+
def test_resume(dummy_config, tmp_path):
63+
mock_specs = mb.setup_test_behavior_specs(
64+
True, False, vector_action_space=[2], vector_obs_space=1
65+
)
66+
behavior_id_team0 = "test_brain?team=0"
67+
behavior_id_team1 = "test_brain?team=1"
68+
brain_name = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0).brain_name
69+
tmp_path = tmp_path.as_posix()
70+
ppo_trainer = PPOTrainer(brain_name, 0, dummy_config, True, False, 0, tmp_path)
71+
controller = GhostController(100)
72+
trainer = GhostTrainer(
73+
ppo_trainer, brain_name, controller, 0, dummy_config, True, tmp_path
74+
)
75+
76+
parsed_behavior_id0 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team0)
77+
policy = trainer.create_policy(parsed_behavior_id0, mock_specs)
78+
trainer.add_policy(parsed_behavior_id0, policy)
79+
80+
parsed_behavior_id1 = BehaviorIdentifiers.from_name_behavior_id(behavior_id_team1)
81+
policy = trainer.create_policy(parsed_behavior_id1, mock_specs)
82+
trainer.add_policy(parsed_behavior_id1, policy)
83+
84+
trainer.save_model()
85+
86+
# Make a new trainer, check that the policies are the same
87+
ppo_trainer2 = PPOTrainer(brain_name, 0, dummy_config, True, True, 0, tmp_path)
88+
trainer2 = GhostTrainer(
89+
ppo_trainer2, brain_name, controller, 0, dummy_config, True, tmp_path
90+
)
91+
policy = trainer2.create_policy(parsed_behavior_id0, mock_specs)
92+
trainer2.add_policy(parsed_behavior_id0, policy)
93+
94+
policy = trainer2.create_policy(parsed_behavior_id1, mock_specs)
95+
trainer2.add_policy(parsed_behavior_id1, policy)
96+
97+
trainer1_policy = trainer.get_policy(parsed_behavior_id1.behavior_id)
98+
trainer2_policy = trainer2.get_policy(parsed_behavior_id1.behavior_id)
99+
weights = trainer1_policy.get_weights()
100+
weights2 = trainer2_policy.get_weights()
101+
102+
for w, lw in zip(weights, weights2):
103+
np.testing.assert_array_equal(w, lw)
104+
105+
62106
def test_process_trajectory(dummy_config):
63107
mock_specs = mb.setup_test_behavior_specs(
64108
True, False, vector_action_space=[2], vector_obs_space=1

ml-agents/setup.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ def run(self):
6464
"protobuf>=3.6",
6565
"pyyaml>=3.1.0",
6666
"tensorflow>=1.14,<3.0",
67-
"cattrs>=1.0.0",
67+
# cattrs 1.1.0 dropped support for python 3.6.
68+
"cattrs>=1.0.0,<1.1.0",
6869
"attrs>=19.3.0",
6970
'pypiwin32==223;platform_system=="Windows"',
7071
# We don't actually need six, but tensorflow does, and pip seems
@@ -79,5 +80,5 @@ def run(self):
7980
]
8081
},
8182
cmdclass={"verify": VerifyVersionCommand},
82-
extras_require={"torch": ["torch>=1.5.0"]},
83+
extras_require={"torch": ["torch>=1.5.0,<1.7.0"]},
8384
)

test_requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ pytest-cov==2.6.1
44
pytest-xdist==1.34.0
55

66
# PyTorch tests are here for the time being, before they are used in the codebase.
7-
torch>=1.5.0
7+
torch>=1.5.0, <1.7.0
88

99
tf2onnx>=1.5.5

0 commit comments

Comments
 (0)