From abc222316428a7647a2a50b4edbf45c8a74f7e86 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 10:50:47 -0700 Subject: [PATCH 01/16] Make --resume an option --- ml-agents/mlagents/trainers/learn.py | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 8c912c9cda..b1fd97cb4f 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -67,7 +67,21 @@ def _create_parser(): default=False, dest="load_model", action="store_true", - help="Whether to load the model or randomly initialize", + help=argparse.SUPPRESS, # Deprecated but still usable for now. + ) + argparser.add_argument( + "--resume", + default=False, + dest="resume", + action="store_true", + help="Resumes training from a checkpoint. Specify a --run-id to use this option.", + ) + argparser.add_argument( + "--load", + default=False, + dest="load_model", + action="store_true", + help=argparse.SUPPRESS, ) argparser.add_argument( "--run-id", @@ -167,6 +181,7 @@ class RunOptions(NamedTuple): env_path: Optional[str] = parser.get_default("env_path") run_id: str = parser.get_default("run_id") load_model: bool = parser.get_default("load_model") + resume: bool = parser.get_default("resume") train_model: bool = parser.get_default("train_model") save_freq: int = parser.get_default("save_freq") keep_checkpoints: int = parser.get_default("keep_checkpoints") @@ -282,7 +297,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: model_path, options.keep_checkpoints, options.train_model, - options.load_model, + options.load_model or options.resume, run_seed, maybe_meta_curriculum, options.multi_gpu, @@ -423,6 +438,12 @@ def run_cli(options: RunOptions) -> None: trainer_logger.debug("Configuration for this run:") trainer_logger.debug(json.dumps(options._asdict(), indent=4)) + # Options deprecation warnings + if options.load_model: + trainer_logger.warning( + "The --load option has been deprecated. Please use the --resume option instead." + ) + run_seed = options.seed if options.cpu: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" From 69080a181fe568eb4574b80034694ecccc702be3 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 10:52:25 -0700 Subject: [PATCH 02/16] Fix issue --- ml-agents/mlagents/trainers/learn.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index b1fd97cb4f..f394c0659b 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -76,13 +76,6 @@ def _create_parser(): action="store_true", help="Resumes training from a checkpoint. Specify a --run-id to use this option.", ) - argparser.add_argument( - "--load", - default=False, - dest="load_model", - action="store_true", - help=argparse.SUPPRESS, - ) argparser.add_argument( "--run-id", default="ppo", From 3afb8acc23e44a629e6972159188b1f3847e9963 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 11:28:59 -0700 Subject: [PATCH 03/16] Make load error an exception --- ml-agents/mlagents/trainers/policy/tf_policy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index 428ba19796..ca042cb2ed 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -115,10 +115,10 @@ def _load_graph(self): logger.info("Loading Model for brain {}".format(self.brain.brain_name)) ckpt = tf.train.get_checkpoint_state(self.model_path) if ckpt is None: - logger.info( - "The model {0} could not be found. Make " + raise UnityPolicyException( + "The model {0} could not be loaded. Make " "sure you specified the right " - "--run-id".format(self.model_path) + "--run-id.".format(self.model_path) ) self.saver.restore(self.sess, ckpt.model_checkpoint_path) From dfb657b9f03400fb30037b72e4ab1e42218afb40 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 13:59:42 -0700 Subject: [PATCH 04/16] Make StatsWriter erase existing tb files --- ml-agents/mlagents/trainers/stats.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 7a579f7355..7eeadf198f 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -198,8 +198,21 @@ def _maybe_create_summary_writer(self, category: str) -> None: basedir=self.base_dir, category=category ) os.makedirs(filewriter_dir, exist_ok=True) + self._delete_all_events_files(filewriter_dir) self.summary_writers[category] = tf.summary.FileWriter(filewriter_dir) + def _delete_all_events_files(self, directory_name: str) -> None: + for file_name in os.listdir(directory_name): + if file_name.startswith("events.out"): + full_fname = os.path.join(directory_name, file_name) + try: + os.remove(full_fname) + except OSError: + logger.warning( + "{} was left over from a previous run and " + "not deleted.".format(full_fname) + ) + def add_property( self, category: str, property_type: StatsPropertyType, value: Any ) -> None: From 3537c36e1cca88bbbacce9311680cb3b28ddeb46 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 14:02:41 -0700 Subject: [PATCH 05/16] Make --force and check for existing models --- ml-agents/mlagents/trainers/learn.py | 23 ++++++++++++-- ml-agents/mlagents/trainers/trainer_util.py | 35 +++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index f394c0659b..226742d869 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -13,7 +13,11 @@ from mlagents import tf_utils from mlagents.trainers.trainer_controller import TrainerController from mlagents.trainers.meta_curriculum import MetaCurriculum -from mlagents.trainers.trainer_util import load_config, TrainerFactory +from mlagents.trainers.trainer_util import ( + load_config, + TrainerFactory, + handle_existing_directories, +) from mlagents.trainers.stats import ( TensorboardWriter, CSVWriter, @@ -76,6 +80,14 @@ def _create_parser(): action="store_true", help="Resumes training from a checkpoint. Specify a --run-id to use this option.", ) + argparser.add_argument( + "--force", + default=False, + dest="force", + action="store_true", + help="Force-overwrite existing models and summaries for a run-id that has been used " + "before.", + ) argparser.add_argument( "--run-id", default="ppo", @@ -175,6 +187,7 @@ class RunOptions(NamedTuple): run_id: str = parser.get_default("run_id") load_model: bool = parser.get_default("load_model") resume: bool = parser.get_default("resume") + force: bool = parser.get_default("force") train_model: bool = parser.get_default("train_model") save_freq: int = parser.get_default("save_freq") keep_checkpoints: int = parser.get_default("keep_checkpoints") @@ -212,7 +225,8 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions": argparse_args["sampler_config"] = load_config( argparse_args["sampler_file_path"] ) - + # Keep deprecated --load working, TODO: remove + argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"] # Since argparse accepts file paths in the config options which don't exist in CommandLineOptions, # these keys will need to be deleted to use the **/splat operator below. argparse_args.pop("sampler_file_path") @@ -256,6 +270,9 @@ def run_training(run_seed: int, options: RunOptions) -> None: "Environment/Episode Length", ], ) + handle_existing_directories( + model_path, summaries_dir, options.resume, options.force + ) tb_writer = TensorboardWriter(summaries_dir) gauge_write = GaugeWriter() console_writer = ConsoleWriter() @@ -290,7 +307,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: model_path, options.keep_checkpoints, options.train_model, - options.load_model or options.resume, + options.resume, run_seed, maybe_meta_curriculum, options.multi_gpu, diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index a112da05c5..6f3a3a9463 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -1,5 +1,6 @@ import os import yaml +import shutil from typing import Any, Dict, TextIO import logging @@ -190,3 +191,37 @@ def _load_config(fp: TextIO) -> Dict[str, Any]: "Error parsing yaml file. Please check for formatting errors. " "A tool such as http://www.yamllint.com/ can be helpful with this." ) from e + + +def handle_existing_directories( + model_path: str, summary_path: str, resume: bool, force: bool +) -> None: + """ + Validates that if the run_id model exists, we do not overwrite it unless --force is specified. + Throws an exception if resume isn't specified and run_id exists. Throws an exception + if --resume is specified and run-id was not found. + :param model_path: The model path specified. + :param resume: Whether or not the --resume flag was passed. + :param force: Whether or not the --force flag was passed. + """ + + def _try_delete_directory(directory_path: str) -> None: + try: + if os.path.isdir(directory_path): + shutil.rmtree(directory_path) + except os.error: + raise UnityTrainerException( + "Unable to overwrite previous directory {}. Check to see that you have" + "the appropriate file permissions in that directory.".format( + directory_path + ) + ) + + model_path_exists = os.path.isdir(model_path) + + if model_path_exists: + if not resume and not force: + raise UnityTrainerException( + "Previous data from this run-id was found. " + "Either specify a new run-id or use the --force parameter to overwrite existing data." + ) From b3344044cb774fbb7456c396dbb51919c1930a91 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 14:03:37 -0700 Subject: [PATCH 06/16] Fix comment --- ml-agents/mlagents/trainers/trainer_util.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index 6f3a3a9463..d9702425b3 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -201,6 +201,7 @@ def handle_existing_directories( Throws an exception if resume isn't specified and run_id exists. Throws an exception if --resume is specified and run-id was not found. :param model_path: The model path specified. + :param summary_path: The summary path to be used. :param resume: Whether or not the --resume flag was passed. :param force: Whether or not the --force flag was passed. """ From c2cbef324707cd044115fba75ccc48a435217d48 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 14:08:01 -0700 Subject: [PATCH 07/16] Remove directory deletion --- ml-agents/mlagents/trainers/trainer_util.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index d9702425b3..a2c2e94999 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -1,6 +1,5 @@ import os import yaml -import shutil from typing import Any, Dict, TextIO import logging @@ -206,18 +205,6 @@ def handle_existing_directories( :param force: Whether or not the --force flag was passed. """ - def _try_delete_directory(directory_path: str) -> None: - try: - if os.path.isdir(directory_path): - shutil.rmtree(directory_path) - except os.error: - raise UnityTrainerException( - "Unable to overwrite previous directory {}. Check to see that you have" - "the appropriate file permissions in that directory.".format( - directory_path - ) - ) - model_path_exists = os.path.isdir(model_path) if model_path_exists: From 57af197973a6b1f435236d3de71796296557de5d Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 14:18:53 -0700 Subject: [PATCH 08/16] Deprecate --train --- ml-agents/mlagents/trainers/learn.py | 20 ++++++++++++++++--- .../mlagents/trainers/policy/tf_policy.py | 3 ++- ml-agents/mlagents/trainers/trainer_util.py | 9 ++++++++- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 226742d869..6554e46095 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -104,7 +104,15 @@ def _create_parser(): default=False, dest="train_model", action="store_true", - help="Whether to train model, or only run inference", + help=argparse.SUPPRESS, + ) + argparser.add_argument( + "--inference", + default=False, + dest="inference", + action="store_true", + help="Run in Python inference mode (don't train). Use with --resume to load a model trained with an " + "existing run-id.", ) argparser.add_argument( "--base-port", @@ -189,6 +197,7 @@ class RunOptions(NamedTuple): resume: bool = parser.get_default("resume") force: bool = parser.get_default("force") train_model: bool = parser.get_default("train_model") + inference: bool = parser.get_default("inference") save_freq: int = parser.get_default("save_freq") keep_checkpoints: int = parser.get_default("keep_checkpoints") base_port: int = parser.get_default("base_port") @@ -306,7 +315,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: options.run_id, model_path, options.keep_checkpoints, - options.train_model, + not options.inference, options.resume, run_seed, maybe_meta_curriculum, @@ -320,7 +329,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: options.run_id, options.save_freq, maybe_meta_curriculum, - options.train_model, + not options.inference, run_seed, sampler_manager, resampling_interval, @@ -453,6 +462,11 @@ def run_cli(options: RunOptions) -> None: trainer_logger.warning( "The --load option has been deprecated. Please use the --resume option instead." ) + if options.train_model: + trainer_logger.warning( + "The --train option has been deprecated. Train mode is now the default. Use " + "--inference to run in inference mode." + ) run_seed = options.seed if options.cpu: diff --git a/ml-agents/mlagents/trainers/policy/tf_policy.py b/ml-agents/mlagents/trainers/policy/tf_policy.py index ca042cb2ed..7cbd52a90f 100644 --- a/ml-agents/mlagents/trainers/policy/tf_policy.py +++ b/ml-agents/mlagents/trainers/policy/tf_policy.py @@ -118,7 +118,8 @@ def _load_graph(self): raise UnityPolicyException( "The model {0} could not be loaded. Make " "sure you specified the right " - "--run-id.".format(self.model_path) + "--run-id. and that the previous run you are resuming from had the same " + "behavior names.".format(self.model_path) ) self.saver.restore(self.sess, ckpt.model_checkpoint_path) diff --git a/ml-agents/mlagents/trainers/trainer_util.py b/ml-agents/mlagents/trainers/trainer_util.py index a2c2e94999..071abb60fe 100644 --- a/ml-agents/mlagents/trainers/trainer_util.py +++ b/ml-agents/mlagents/trainers/trainer_util.py @@ -211,5 +211,12 @@ def handle_existing_directories( if not resume and not force: raise UnityTrainerException( "Previous data from this run-id was found. " - "Either specify a new run-id or use the --force parameter to overwrite existing data." + "Either specify a new run-id, use --resume to resume this run, " + "or use the --force parameter to overwrite existing data." + ) + else: + if resume: + raise UnityTrainerException( + "Previous data from this run-id was not found. " + "Train a new run by removing the --resume flag." ) From 8b35f15a93427e364c8e54f921c7502fbf4d0517 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 17:00:08 -0700 Subject: [PATCH 09/16] Add tests --- .../mlagents/trainers/tests/test_learn.py | 18 ++++++++++----- .../trainers/tests/test_trainer_util.py | 22 +++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/ml-agents/mlagents/trainers/tests/test_learn.py b/ml-agents/mlagents/trainers/tests/test_learn.py index fd291de711..969d62ebd0 100644 --- a/ml-agents/mlagents/trainers/tests/test_learn.py +++ b/ml-agents/mlagents/trainers/tests/test_learn.py @@ -15,6 +15,7 @@ def basic_options(extra_args=None): return parse_command_line(args) +@patch("mlagents.trainers.learn.handle_existing_directories") @patch("mlagents.trainers.learn.TrainerFactory") @patch("mlagents.trainers.learn.SamplerManager") @patch("mlagents.trainers.learn.SubprocessEnvManager") @@ -26,6 +27,7 @@ def test_run_training( subproc_env_mock, sampler_manager_mock, trainer_factory_mock, + handle_dir_mock, ): mock_env = MagicMock() mock_env.external_brain_names = [] @@ -45,11 +47,14 @@ def test_run_training( "ppo", 50000, None, - False, + True, 0, sampler_manager_mock.return_value, None, ) + handle_dir_mock.assert_called_once_with( + "./models/ppo", "./summaries", False, False + ) StatsReporter.writers.clear() # make sure there aren't any writers as added by learn.py @@ -79,11 +84,11 @@ def test_commandline_args(mock_file): assert opt.sampler_config is None assert opt.keep_checkpoints == 5 assert opt.lesson == 0 - assert opt.load_model is False + assert opt.resume is False + assert opt.inference is False assert opt.run_id == "ppo" assert opt.save_freq == 50000 assert opt.seed == -1 - assert opt.train_model is False assert opt.base_port == 5005 assert opt.num_envs == 1 assert opt.no_graphics is False @@ -97,7 +102,8 @@ def test_commandline_args(mock_file): "--sampler=./mysample", "--keep-checkpoints=42", "--lesson=3", - "--load", + "--resume", + "--inference", "--run-id=myawesomerun", "--save-freq=123456", "--seed=7890", @@ -115,15 +121,15 @@ def test_commandline_args(mock_file): assert opt.sampler_config == {} assert opt.keep_checkpoints == 42 assert opt.lesson == 3 - assert opt.load_model is True assert opt.run_id == "myawesomerun" assert opt.save_freq == 123456 assert opt.seed == 7890 - assert opt.train_model is True assert opt.base_port == 4004 assert opt.num_envs == 2 assert opt.no_graphics is True assert opt.debug is True + assert opt.inference is True + assert opt.resume is True @patch("builtins.open", new_callable=mock_open, read_data="{}") diff --git a/ml-agents/mlagents/trainers/tests/test_trainer_util.py b/ml-agents/mlagents/trainers/tests/test_trainer_util.py index 2d90284cc8..0ab2bc1775 100644 --- a/ml-agents/mlagents/trainers/tests/test_trainer_util.py +++ b/ml-agents/mlagents/trainers/tests/test_trainer_util.py @@ -1,6 +1,7 @@ import pytest import yaml import io +import os from unittest.mock import patch from mlagents.trainers import trainer_util @@ -335,3 +336,24 @@ def test_load_config_invalid_yaml(): with pytest.raises(TrainerConfigError): fp = io.StringIO(file_contents) _load_config(fp) + + +def test_existing_directories(tmp_path): + model_path = os.path.join(tmp_path, "runid") + # Unused summary path + summary_path = os.path.join(tmp_path, "runid") + # Test fresh new unused path - should do nothing. + trainer_util.handle_existing_directories(model_path, summary_path, False, False) + # Test resume with fresh path - should throw an exception. + with pytest.raises(UnityTrainerException): + trainer_util.handle_existing_directories(model_path, summary_path, True, False) + + # make a directory + os.mkdir(model_path) + # Test try to train w.o. force, should complain + with pytest.raises(UnityTrainerException): + trainer_util.handle_existing_directories(model_path, summary_path, False, False) + # Test try to train w/ resume - should work + trainer_util.handle_existing_directories(model_path, summary_path, True, False) + # Test try to train w/ force - should work + trainer_util.handle_existing_directories(model_path, summary_path, False, True) From a45d2e44782c95915a612e5e6d71749c30db1b8b Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 17:34:53 -0700 Subject: [PATCH 10/16] Edit docs --- docs/Getting-Started.md | 19 ++++++---- docs/Learning-Environment-Create-New.md | 2 +- docs/Learning-Environment-Executable.md | 8 ++-- docs/Training-Curriculum-Learning.md | 2 +- ...ing-Environment-Parameter-Randomization.md | 2 +- docs/Training-ML-Agents.md | 38 ++++++++++++++----- 6 files changed, 46 insertions(+), 25 deletions(-) diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md index 9372d7c970..1b796ca471 100644 --- a/docs/Getting-Started.md +++ b/docs/Getting-Started.md @@ -197,19 +197,17 @@ which accepts arguments used to configure both training and inference phases. 2. Navigate to the folder where you cloned the ML-Agents toolkit repository. **Note**: If you followed the default [installation](Installation.md), then you should be able to run `mlagents-learn` from any directory. -3. Run `mlagents-learn --run-id= --train` +3. Run `mlagents-learn --run-id=` where: - `` is the relative or absolute filepath of the trainer configuration. The defaults used by example environments included in `MLAgentsSDK` can be found in `config/trainer_config.yaml`. - `` is a string used to separate the results of different - training runs - - `--train` tells `mlagents-learn` to run a training session (rather - than inference) + training runs. Make sure to use one that hasn't been used already! 4. If you cloned the ML-Agents repo, then you can simply run ```sh - mlagents-learn config/trainer_config.yaml --run-id=firstRun --train + mlagents-learn config/trainer_config.yaml --run-id=firstRun ``` 5. When the message _"Start training by pressing the Play button in the Unity @@ -219,7 +217,6 @@ which accepts arguments used to configure both training and inference phases. **Note**: If you're using Anaconda, don't forget to activate the ml-agents environment first. -The `--train` flag tells the ML-Agents toolkit to run in training mode. The `--time-scale=100` sets the `Time.TimeScale` value in Unity. **Note**: You can train using an executable rather than the Editor. To do so, @@ -330,8 +327,14 @@ Either wait for the training process to close the window or press Ctrl+C at the command-line prompt. If you close the window manually, the `.nn` file containing the trained model is not exported into the ml-agents folder. -You can press Ctrl+C to stop the training, and your trained model will be at -`models//.nn` where +If you've quit the training early using Ctrl+C and want to resume training, run the +same command again, appending the `--resume` flag: + +```sh +mlagents-learn config/trainer_config.yaml --run-id=firstRun --resume +``` + +Your trained model will be at `models//.nn` where `` is the name of the `Behavior Name` of the agents corresponding to the model. (**Note:** There is a known bug on Windows that causes the saving of the model to fail when you early terminate the training, it's recommended to wait until Step diff --git a/docs/Learning-Environment-Create-New.md b/docs/Learning-Environment-Create-New.md index fc776a8ef8..57102bb9ce 100644 --- a/docs/Learning-Environment-Create-New.md +++ b/docs/Learning-Environment-Create-New.md @@ -423,7 +423,7 @@ in this simple environment, speeds up training. To train in the editor, run the following Python command from a Terminal or Console window before pressing play: - mlagents-learn config/config.yaml --run-id=RollerBall-1 --train + mlagents-learn config/config.yaml --run-id=RollerBall-1 (where `config.yaml` is a copy of `trainer_config.yaml` that you have edited to change the `batch_size` and `buffer_size` hyperparameters for your trainer.) diff --git a/docs/Learning-Environment-Executable.md b/docs/Learning-Environment-Executable.md index 5a2eec55d7..e9c560cdde 100644 --- a/docs/Learning-Environment-Executable.md +++ b/docs/Learning-Environment-Executable.md @@ -76,27 +76,25 @@ env = UnityEnvironment(file_name=) followed the default [installation](Installation.md), then navigate to the `ml-agents/` folder. 3. Run - `mlagents-learn --env= --run-id= --train` + `mlagents-learn --env= --run-id=` Where: * `` is the file path of the trainer configuration yaml * `` is the name and path to the executable you exported from Unity (without extension) * `` is a string used to separate the results of different training runs - * And the `--train` tells `mlagents-learn` to run a training session (rather - than inference) For example, if you are training with a 3DBall executable you exported to the the directory where you installed the ML-Agents Toolkit, run: ```sh -mlagents-learn ../config/trainer_config.yaml --env=3DBall --run-id=firstRun --train +mlagents-learn ../config/trainer_config.yaml --env=3DBall --run-id=firstRun ``` And you should see something like ```console -ml-agents$ mlagents-learn config/trainer_config.yaml --env=3DBall --run-id=first-run --train +ml-agents$ mlagents-learn config/trainer_config.yaml --env=3DBall --run-id=first-run ▄▄▄▓▓▓▓ diff --git a/docs/Training-Curriculum-Learning.md b/docs/Training-Curriculum-Learning.md index d51d125663..12bd4c5e38 100644 --- a/docs/Training-Curriculum-Learning.md +++ b/docs/Training-Curriculum-Learning.md @@ -110,7 +110,7 @@ for our curricula and PPO will train using Curriculum Learning. For example, to train agents in the Wall Jump environment with curriculum learning, we can run: ```sh -mlagents-learn config/trainer_config.yaml --curriculum=config/curricula/wall_jump.yaml --run-id=wall-jump-curriculum --train +mlagents-learn config/trainer_config.yaml --curriculum=config/curricula/wall_jump.yaml --run-id=wall-jump-curriculum ``` We can then keep track of the current lessons and progresses via TensorBoard. diff --git a/docs/Training-Environment-Parameter-Randomization.md b/docs/Training-Environment-Parameter-Randomization.md index 892599cd31..c2420f6bc5 100644 --- a/docs/Training-Environment-Parameter-Randomization.md +++ b/docs/Training-Environment-Parameter-Randomization.md @@ -165,7 +165,7 @@ sampling setup, we would run ```sh mlagents-learn config/trainer_config.yaml --sampler=config/3dball_randomize.yaml ---run-id=3D-Ball-randomize --train +--run-id=3D-Ball-randomize ``` We can observe progress and metrics via Tensorboard. diff --git a/docs/Training-ML-Agents.md b/docs/Training-ML-Agents.md index 64ae7026fe..a8c7deafcb 100644 --- a/docs/Training-ML-Agents.md +++ b/docs/Training-ML-Agents.md @@ -43,7 +43,7 @@ training options. The basic command for training is: ```sh -mlagents-learn --env= --run-id= --train +mlagents-learn --env= --run-id= ``` where @@ -68,7 +68,7 @@ contains agents ready to train. To perform the training: environment you built in step 1: ```sh -mlagents-learn config/trainer_config.yaml --env=../../projects/Cats/CatsOnBicycles.app --run-id=cob_1 --train +mlagents-learn config/trainer_config.yaml --env=../../projects/Cats/CatsOnBicycles.app --run-id=cob_1 ``` During a training session, the training program prints out and saves updates at @@ -92,9 +92,27 @@ under the assigned run-id — in the cats example, the path to the model would b `models/cob_1/CatsOnBicycles_cob_1.nn`. While this example used the default training hyperparameters, you can edit the -[training_config.yaml file](#training-config-file) with a text editor to set +[trainer_config.yaml file](#training-config-file) with a text editor to set different values. +To interrupt training and save the current progress, hit Ctrl+C once and wait for the +model to be saved out. + +### Loading an Existing Model + +If you've quit training early using Ctrl+C, you can resume the training run by running +`mlagents-learn` again, specifying the same `` and appending the `--resume` flag +to the command. + +You can also use this mode to run inference of an already-trained model in Python. +Append both the `--resume` and `--inference` to do this. Note that if you want to run +inference in Unity, you should use the +[Unity Inference Engine](Getting-started#Running-a-pre-trained-model). + +If you've already trained a model using the specified `` and `--resume` is not +specified, you will not be able to continue with training. Use `--force` to force ML-Agents to +overwrite the existing data. + ### Command Line Training Options In addition to passing the path of the Unity executable containing your training @@ -115,7 +133,7 @@ environment, you can set the following command line options when invoking training. Defaults to 0. * `--num-envs=`: Specifies the number of concurrent Unity environment instances to collect experiences from when training. Defaults to 1. -* `--run-id=`: Specifies an identifier for each training run. This +* `--run-id=`: Specifies an identifier for each training run. This identifier is used to name the subdirectories in which the trained model and summary statistics are saved as well as the saved model itself. The default id is "ppo". If you use TensorBoard to view the training statistics, always set a @@ -137,13 +155,15 @@ environment, you can set the following command line options when invoking will use the port `(base_port + worker_id)`, where the `worker_id` is sequential IDs given to each instance from 0 to `num_envs - 1`. Default is 5005. __Note:__ When training using the Editor rather than an executable, the base port will be ignored. -* `--train`: Specifies whether to train model or only run in inference mode. - When training, **always** use the `--train` option. -* `--load`: If set, the training code loads an already trained model to +* `--inference`: Specifies whether to only run in inference mode. Omit to train the model. + To load an existing model, specify a run-id and combine with `--resume`. +* `--resume`: If set, the training code loads an already trained model to initialize the neural network before training. The learning code looks for the model in `models//` (which is also where it saves models at the end of - training). When not set (the default), the neural network weights are randomly - initialized and an existing model is not loaded. + training). This option only works when the models exist, and have the same behavior names + as the current agents in your scene. +* `--force`: Attempting to train a model with a run-id that has been used before will + throw an error. Use `--force` to force-overwrite this run-id's summary and model data. * `--no-graphics`: Specify this option to run the Unity executable in `-batchmode` and doesn't initialize the graphics driver. Use this only if your training doesn't involve visual observations (reading from Pixels). See From 146d6cb64afa27a375d1090bd7ee09bbcb156dde Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Fri, 27 Mar 2020 17:42:27 -0700 Subject: [PATCH 11/16] Update changelog and migrating --- com.unity.ml-agents/CHANGELOG.md | 3 +++ docs/Migrating.md | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 7de458e867..a8c3143767 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Major Changes + - The `--load` and `--train` command-line flags have been deprecated. Training now happens by default, and + use `--resume` to resume training instead. (#3705) ### Minor Changes - Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616) @@ -19,6 +21,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Environment subprocesses now close immediately on timeout or wrong API version. (#3679) - Fixed an issue in the gym wrapper that would raise an exception if an Agent called EndEpisode multiple times in the same step. (#3700) - Fixed an issue where exceptions from environments provided a returncode of 0. (#3680) + - Running `mlagents-learn` with the same `--run-id` twice will no longer overwrite the existing files. (#3705) ## [0.15.0-preview] - 2020-03-18 ### Major Changes diff --git a/docs/Migrating.md b/docs/Migrating.md index 902a35d3db..74525e8d3c 100644 --- a/docs/Migrating.md +++ b/docs/Migrating.md @@ -10,9 +10,13 @@ The versions can be found in ## Migrating from 0.15 to latest ### Important changes +* The `--load` and `--train` command-line flags have been deprecated and replaced with `--resume` and `--inference`. +* Running with the same `--run-id` twice will now throw an error. ### Steps to Migrate - +* Replace the `--load` flag with `--resume` when calling `mlagents-learn`, and don't use the `--train` flag as training + will happen by default. To run without training, use `--inference`. +* To force-overwrite files from a pre-existing run, add the `--force` command-line flag. ## Migrating from 0.14 to 0.15 From 48c274ef4bf43458aae076fcb7e778b9abbfbf17 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 30 Mar 2020 11:46:06 -0700 Subject: [PATCH 12/16] Don't clear TB on resume --- ml-agents/mlagents/trainers/learn.py | 2 +- ml-agents/mlagents/trainers/stats.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 6554e46095..8e442cb4fb 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -282,7 +282,7 @@ def run_training(run_seed: int, options: RunOptions) -> None: handle_existing_directories( model_path, summaries_dir, options.resume, options.force ) - tb_writer = TensorboardWriter(summaries_dir) + tb_writer = TensorboardWriter(summaries_dir, clear_past_data=not options.resume) gauge_write = GaugeWriter() console_writer = ConsoleWriter() StatsReporter.add_writer(tb_writer) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index 7eeadf198f..e79d4b697f 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -173,14 +173,17 @@ def _dict_to_str(self, param_dict: Dict[str, Any], num_tabs: int) -> str: class TensorboardWriter(StatsWriter): - def __init__(self, base_dir: str): + def __init__(self, base_dir: str, clear_past_data: bool = False): """ A StatsWriter that writes to a Tensorboard summary. :param base_dir: The directory within which to place all the summaries. Tensorboard files will be written to a {base_dir}/{category} directory. + :param clear_past_data: Whether or not to clean up existing Tensorboard files associated with the base_dir and + category. """ self.summary_writers: Dict[str, tf.summary.FileWriter] = {} self.base_dir: str = base_dir + self._clear_past_data = clear_past_data def write_stats( self, category: str, values: Dict[str, StatsSummary], step: int @@ -198,7 +201,8 @@ def _maybe_create_summary_writer(self, category: str) -> None: basedir=self.base_dir, category=category ) os.makedirs(filewriter_dir, exist_ok=True) - self._delete_all_events_files(filewriter_dir) + if self._clear_past_data: + self._delete_all_events_files(filewriter_dir) self.summary_writers[category] = tf.summary.FileWriter(filewriter_dir) def _delete_all_events_files(self, directory_name: str) -> None: From c60057ed8d5b14305d4069a253d096bb59454ee7 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 30 Mar 2020 12:06:43 -0700 Subject: [PATCH 13/16] Test tensorboard clearing --- .../mlagents/trainers/tests/test_stats.py | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/ml-agents/mlagents/trainers/tests/test_stats.py b/ml-agents/mlagents/trainers/tests/test_stats.py index 20c5803a40..632c0abb9c 100644 --- a/ml-agents/mlagents/trainers/tests/test_stats.py +++ b/ml-agents/mlagents/trainers/tests/test_stats.py @@ -4,6 +4,7 @@ import tempfile import unittest import csv +import time from mlagents.trainers.stats import ( StatsReporter, @@ -75,7 +76,7 @@ def test_tensorboard_writer(mock_filewriter, mock_summary): # Test write_stats category = "category1" with tempfile.TemporaryDirectory(prefix="unittest-") as base_dir: - tb_writer = TensorboardWriter(base_dir) + tb_writer = TensorboardWriter(base_dir, clear_past_data=False) statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1) tb_writer.write_stats("category1", {"key1": statssummary1}, 10) @@ -102,6 +103,26 @@ def test_tensorboard_writer(mock_filewriter, mock_summary): assert mock_filewriter.return_value.add_summary.call_count > 1 +def test_tensorboard_writer_clear(tmp_path): + tb_writer = TensorboardWriter(tmp_path, clear_past_data=False) + statssummary1 = StatsSummary(mean=1.0, std=1.0, num=1) + tb_writer.write_stats("category1", {"key1": statssummary1}, 10) + # TB has some sort of timeout before making a new file + time.sleep(1.0) + assert len(os.listdir(os.path.join(tmp_path, "category1"))) > 0 + + # See if creating a new one doesn't delete it + tb_writer = TensorboardWriter(tmp_path, clear_past_data=False) + tb_writer.write_stats("category1", {"key1": statssummary1}, 10) + assert len(os.listdir(os.path.join(tmp_path, "category1"))) > 1 + time.sleep(1.0) + + # See if creating a new one deletes old ones + tb_writer = TensorboardWriter(tmp_path, clear_past_data=True) + tb_writer.write_stats("category1", {"key1": statssummary1}, 10) + assert len(os.listdir(os.path.join(tmp_path, "category1"))) == 1 + + def test_csv_writer(): # Test write_stats category = "category1" From cf05095b0bafaee8967e35472518aae7362caae4 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 30 Mar 2020 14:30:44 -0700 Subject: [PATCH 14/16] Added warning when cleaning out old events files --- ml-agents/mlagents/trainers/stats.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ml-agents/mlagents/trainers/stats.py b/ml-agents/mlagents/trainers/stats.py index e79d4b697f..53bf3981c1 100644 --- a/ml-agents/mlagents/trainers/stats.py +++ b/ml-agents/mlagents/trainers/stats.py @@ -208,6 +208,9 @@ def _maybe_create_summary_writer(self, category: str) -> None: def _delete_all_events_files(self, directory_name: str) -> None: for file_name in os.listdir(directory_name): if file_name.startswith("events.out"): + logger.warning( + "{} was left over from a previous run. Deleting.".format(file_name) + ) full_fname = os.path.join(directory_name, file_name) try: os.remove(full_fname) From bea21de75a81538a5caaf87fb06e4c5719af1c59 Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 30 Mar 2020 14:38:20 -0700 Subject: [PATCH 15/16] Fix merge --- ml-agents/mlagents/trainers/learn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ml-agents/mlagents/trainers/learn.py b/ml-agents/mlagents/trainers/learn.py index 37705a1a22..bf1cc720c7 100644 --- a/ml-agents/mlagents/trainers/learn.py +++ b/ml-agents/mlagents/trainers/learn.py @@ -460,11 +460,11 @@ def run_cli(options: RunOptions) -> None: # Options deprecation warnings if options.load_model: - trainer_logger.warning( + logger.warning( "The --load option has been deprecated. Please use the --resume option instead." ) if options.train_model: - trainer_logger.warning( + logger.warning( "The --train option has been deprecated. Train mode is now the default. Use " "--inference to run in inference mode." ) From 6e43ed0c01a500bdf787a42dbed1d58124da2f4c Mon Sep 17 00:00:00 2001 From: Ervin Teng Date: Mon, 30 Mar 2020 14:39:00 -0700 Subject: [PATCH 16/16] Fix merge --- com.unity.ml-agents/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 34a2a8b94c..bcba1e41d7 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -12,7 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - The Jupyter notebooks have been removed from the repository. - Introduced the `SideChannelUtils` to register, unregister and access side channels. - `Academy.FloatProperties` was removed, please use `SideChannelUtils.GetSideChannel()` instead. - + ### Minor Changes - Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616) - Raise the wall in CrawlerStatic scene to prevent Agent from falling off. (#3650)