diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md
index 47fccd2a68..b91f9ae2bb 100755
--- a/com.unity.ml-agents/CHANGELOG.md
+++ b/com.unity.ml-agents/CHANGELOG.md
@@ -30,12 +30,6 @@ and this project adheres to
2. env_params.restarts_rate_limit_n (--restarts-rate-limit-n) [default=1]
3. env_params.restarts_rate_limit_period_s (--restarts-rate-limit-period-s) [default=60]
-
-- Deterministic action selection is now supported during training and inference(#5619)
- - Added a new `--deterministic` cli flag to deterministically select the most probable actions in policy. The same thing can
- be achieved by adding `deterministic: true` under `network_settings` of the run options configuration.(#5597)
- - Extra tensors are now serialized to support deterministic action selection in onnx. (#5593)
- - Support inference with deterministic action selection in editor (#5599)
### Bug Fixes
- Fixed a bug where the critics were not being normalized during training. (#5595)
- Fixed the bug where curriculum learning would crash because of the incorrect run_options parsing. (#5586)
diff --git a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
index a95b2846f3..c5e8ddc802 100644
--- a/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
+++ b/com.unity.ml-agents/Editor/BehaviorParametersEditor.cs
@@ -25,7 +25,6 @@ internal class BehaviorParametersEditor : UnityEditor.Editor
const string k_BrainParametersName = "m_BrainParameters";
const string k_ModelName = "m_Model";
const string k_InferenceDeviceName = "m_InferenceDevice";
- const string k_DeterministicInference = "m_DeterministicInference";
const string k_BehaviorTypeName = "m_BehaviorType";
const string k_TeamIdName = "TeamId";
const string k_UseChildSensorsName = "m_UseChildSensors";
@@ -69,7 +68,6 @@ public override void OnInspectorGUI()
EditorGUILayout.PropertyField(so.FindProperty(k_ModelName), true);
EditorGUI.indentLevel++;
EditorGUILayout.PropertyField(so.FindProperty(k_InferenceDeviceName), true);
- EditorGUILayout.PropertyField(so.FindProperty(k_DeterministicInference), true);
EditorGUI.indentLevel--;
}
needPolicyUpdate = needPolicyUpdate || EditorGUI.EndChangeCheck();
@@ -158,7 +156,7 @@ void DisplayFailedModelChecks()
{
var failedChecks = Inference.BarracudaModelParamLoader.CheckModel(
barracudaModel, brainParameters, sensors, actuatorComponents,
- observableAttributeSensorTotalSize, behaviorParameters.BehaviorType, behaviorParameters.DeterministicInference
+ observableAttributeSensorTotalSize, behaviorParameters.BehaviorType
);
foreach (var check in failedChecks)
{
diff --git a/com.unity.ml-agents/Runtime/Academy.cs b/com.unity.ml-agents/Runtime/Academy.cs
index 409ccb1dca..85cab21b26 100644
--- a/com.unity.ml-agents/Runtime/Academy.cs
+++ b/com.unity.ml-agents/Runtime/Academy.cs
@@ -616,16 +616,14 @@ void EnvironmentReset()
///
/// The inference device (CPU or GPU) the ModelRunner will use.
///
- /// Inference only: set to true if the action selection from model should be
- /// Deterministic.
/// The ModelRunner compatible with the input settings.
internal ModelRunner GetOrCreateModelRunner(
- NNModel model, ActionSpec actionSpec, InferenceDevice inferenceDevice, bool deterministicInference = false)
+ NNModel model, ActionSpec actionSpec, InferenceDevice inferenceDevice)
{
var modelRunner = m_ModelRunners.Find(x => x.HasModel(model, inferenceDevice));
if (modelRunner == null)
{
- modelRunner = new ModelRunner(model, actionSpec, inferenceDevice, m_InferenceSeed, deterministicInference);
+ modelRunner = new ModelRunner(model, actionSpec, inferenceDevice, m_InferenceSeed);
m_ModelRunners.Add(modelRunner);
m_InferenceSeed++;
}
diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
index 5e7338c057..6fd3872535 100644
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelExtensions.cs
@@ -112,10 +112,8 @@ public static int GetNumVisualInputs(this Model model)
///
/// The Barracuda engine model for loading static parameters.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// Array of the output tensor names of the model
- public static string[] GetOutputNames(this Model model, bool deterministicInference = false)
+ public static string[] GetOutputNames(this Model model)
{
var names = new List();
@@ -124,13 +122,13 @@ public static string[] GetOutputNames(this Model model, bool deterministicInfere
return names.ToArray();
}
- if (model.HasContinuousOutputs(deterministicInference))
+ if (model.HasContinuousOutputs())
{
- names.Add(model.ContinuousOutputName(deterministicInference));
+ names.Add(model.ContinuousOutputName());
}
- if (model.HasDiscreteOutputs(deterministicInference))
+ if (model.HasDiscreteOutputs())
{
- names.Add(model.DiscreteOutputName(deterministicInference));
+ names.Add(model.DiscreteOutputName());
}
var modelVersion = model.GetVersion();
@@ -151,10 +149,8 @@ public static string[] GetOutputNames(this Model model, bool deterministicInfere
///
/// The Barracuda engine model for loading static parameters.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// True if the model has continuous action outputs.
- public static bool HasContinuousOutputs(this Model model, bool deterministicInference = false)
+ public static bool HasContinuousOutputs(this Model model)
{
if (model == null)
return false;
@@ -164,13 +160,8 @@ public static bool HasContinuousOutputs(this Model model, bool deterministicInfe
}
else
{
- bool hasStochasticOutput = !deterministicInference &&
- model.outputs.Contains(TensorNames.ContinuousActionOutput);
- bool hasDeterministicOutput = deterministicInference &&
- model.outputs.Contains(TensorNames.DeterministicContinuousActionOutput);
-
- return (hasStochasticOutput || hasDeterministicOutput) &&
- (int)model.GetTensorByName(TensorNames.ContinuousActionOutputShape)[0] > 0;
+ return model.outputs.Contains(TensorNames.ContinuousActionOutput) &&
+ (int)model.GetTensorByName(TensorNames.ContinuousActionOutputShape)[0] > 0;
}
}
@@ -203,10 +194,8 @@ public static int ContinuousOutputSize(this Model model)
///
/// The Barracuda engine model for loading static parameters.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// Tensor name of continuous action output.
- public static string ContinuousOutputName(this Model model, bool deterministicInference = false)
+ public static string ContinuousOutputName(this Model model)
{
if (model == null)
return null;
@@ -216,7 +205,7 @@ public static string ContinuousOutputName(this Model model, bool deterministicIn
}
else
{
- return deterministicInference ? TensorNames.DeterministicContinuousActionOutput : TensorNames.ContinuousActionOutput;
+ return TensorNames.ContinuousActionOutput;
}
}
@@ -226,10 +215,8 @@ public static string ContinuousOutputName(this Model model, bool deterministicIn
///
/// The Barracuda engine model for loading static parameters.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// True if the model has discrete action outputs.
- public static bool HasDiscreteOutputs(this Model model, bool deterministicInference = false)
+ public static bool HasDiscreteOutputs(this Model model)
{
if (model == null)
return false;
@@ -239,12 +226,7 @@ public static bool HasDiscreteOutputs(this Model model, bool deterministicInfere
}
else
{
- bool hasStochasticOutput = !deterministicInference &&
- model.outputs.Contains(TensorNames.DiscreteActionOutput);
- bool hasDeterministicOutput = deterministicInference &&
- model.outputs.Contains(TensorNames.DeterministicDiscreteActionOutput);
- return (hasStochasticOutput || hasDeterministicOutput) &&
- model.DiscreteOutputSize() > 0;
+ return model.outputs.Contains(TensorNames.DiscreteActionOutput) && model.DiscreteOutputSize() > 0;
}
}
@@ -297,10 +279,8 @@ public static int DiscreteOutputSize(this Model model)
///
/// The Barracuda engine model for loading static parameters.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// Tensor name of discrete action output.
- public static string DiscreteOutputName(this Model model, bool deterministicInference = false)
+ public static string DiscreteOutputName(this Model model)
{
if (model == null)
return null;
@@ -310,7 +290,7 @@ public static string DiscreteOutputName(this Model model, bool deterministicInfe
}
else
{
- return deterministicInference ? TensorNames.DeterministicDiscreteActionOutput : TensorNames.DiscreteActionOutput;
+ return TensorNames.DiscreteActionOutput;
}
}
@@ -336,11 +316,9 @@ public static bool SupportsContinuousAndDiscrete(this Model model)
/// The Barracuda engine model for loading static parameters.
///
/// Output list of failure messages
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
+ ///
/// True if the model contains all the expected tensors.
- /// TODO: add checks for deterministic actions
- public static bool CheckExpectedTensors(this Model model, List failedModelChecks, bool deterministicInference = false)
+ public static bool CheckExpectedTensors(this Model model, List failedModelChecks)
{
// Check the presence of model version
var modelApiVersionTensor = model.GetTensorByName(TensorNames.VersionNumber);
@@ -365,9 +343,7 @@ public static bool CheckExpectedTensors(this Model model, List fail
// Check the presence of action output tensor
if (!model.outputs.Contains(TensorNames.ActionOutputDeprecated) &&
!model.outputs.Contains(TensorNames.ContinuousActionOutput) &&
- !model.outputs.Contains(TensorNames.DiscreteActionOutput) &&
- !model.outputs.Contains(TensorNames.DeterministicContinuousActionOutput) &&
- !model.outputs.Contains(TensorNames.DeterministicDiscreteActionOutput))
+ !model.outputs.Contains(TensorNames.DiscreteActionOutput))
{
failedModelChecks.Add(
FailedCheck.Warning("The model does not contain any Action Output Node.")
@@ -397,51 +373,22 @@ public static bool CheckExpectedTensors(this Model model, List fail
}
else
{
- if (model.outputs.Contains(TensorNames.ContinuousActionOutput))
+ if (model.outputs.Contains(TensorNames.ContinuousActionOutput) &&
+ model.GetTensorByName(TensorNames.ContinuousActionOutputShape) == null)
{
- if (model.GetTensorByName(TensorNames.ContinuousActionOutputShape) == null)
- {
- failedModelChecks.Add(
- FailedCheck.Warning("The model uses continuous action but does not contain Continuous Action Output Shape Node.")
- );
- return false;
- }
-
- else if (!model.HasContinuousOutputs(deterministicInference))
- {
- var actionType = deterministicInference ? "deterministic" : "stochastic";
- var actionName = deterministicInference ? "Deterministic" : "";
- failedModelChecks.Add(
- FailedCheck.Warning($"The model uses {actionType} inference but does not contain {actionName} Continuous Action Output Tensor. Uncheck `Deterministic inference` flag..")
+ failedModelChecks.Add(
+ FailedCheck.Warning("The model uses continuous action but does not contain Continuous Action Output Shape Node.")
);
- return false;
- }
+ return false;
}
-
- if (model.outputs.Contains(TensorNames.DiscreteActionOutput))
+ if (model.outputs.Contains(TensorNames.DiscreteActionOutput) &&
+ model.GetTensorByName(TensorNames.DiscreteActionOutputShape) == null)
{
- if (model.GetTensorByName(TensorNames.DiscreteActionOutputShape) == null)
- {
- failedModelChecks.Add(
- FailedCheck.Warning("The model uses discrete action but does not contain Discrete Action Output Shape Node.")
- );
- return false;
- }
- else if (!model.HasDiscreteOutputs(deterministicInference))
- {
- var actionType = deterministicInference ? "deterministic" : "stochastic";
- var actionName = deterministicInference ? "Deterministic" : "";
- failedModelChecks.Add(
- FailedCheck.Warning($"The model uses {actionType} inference but does not contain {actionName} Discrete Action Output Tensor. Uncheck `Deterministic inference` flag.")
+ failedModelChecks.Add(
+ FailedCheck.Warning("The model uses discrete action but does not contain Discrete Action Output Shape Node.")
);
- return false;
- }
-
+ return false;
}
-
-
-
-
}
return true;
}
diff --git a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
index 6fe10566fd..21917b303d 100644
--- a/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
+++ b/com.unity.ml-agents/Runtime/Inference/BarracudaModelParamLoader.cs
@@ -122,8 +122,6 @@ public static FailedCheck CheckModelVersion(Model model)
/// Attached actuator components
/// Sum of the sizes of all ObservableAttributes.
/// BehaviorType or the Agent to check.
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// A IEnumerable of the checks that failed
public static IEnumerable CheckModel(
Model model,
@@ -131,8 +129,7 @@ public static IEnumerable CheckModel(
ISensor[] sensors,
ActuatorComponent[] actuatorComponents,
int observableAttributeTotalSize = 0,
- BehaviorType behaviorType = BehaviorType.Default,
- bool deterministicInference = false
+ BehaviorType behaviorType = BehaviorType.Default
)
{
List failedModelChecks = new List();
@@ -151,7 +148,7 @@ public static IEnumerable CheckModel(
return failedModelChecks;
}
- var hasExpectedTensors = model.CheckExpectedTensors(failedModelChecks, deterministicInference);
+ var hasExpectedTensors = model.CheckExpectedTensors(failedModelChecks);
if (!hasExpectedTensors)
{
return failedModelChecks;
@@ -184,7 +181,7 @@ public static IEnumerable CheckModel(
else if (modelApiVersion == (int)ModelApiVersion.MLAgents2_0)
{
failedModelChecks.AddRange(
- CheckInputTensorPresence(model, brainParameters, memorySize, sensors, deterministicInference)
+ CheckInputTensorPresence(model, brainParameters, memorySize, sensors)
);
failedModelChecks.AddRange(
CheckInputTensorShape(model, brainParameters, sensors, observableAttributeTotalSize)
@@ -198,7 +195,7 @@ public static IEnumerable CheckModel(
);
failedModelChecks.AddRange(
- CheckOutputTensorPresence(model, memorySize, deterministicInference)
+ CheckOutputTensorPresence(model, memorySize)
);
return failedModelChecks;
}
@@ -321,8 +318,6 @@ ISensor[] sensors
/// The memory size that the model is expecting.
///
/// Array of attached sensor components
- /// Inference only: set to true if the action selection from model should be
- /// Deterministic.
///
/// A IEnumerable of the checks that failed
///
@@ -330,8 +325,7 @@ static IEnumerable CheckInputTensorPresence(
Model model,
BrainParameters brainParameters,
int memory,
- ISensor[] sensors,
- bool deterministicInference = false
+ ISensor[] sensors
)
{
var failedModelChecks = new List();
@@ -362,7 +356,7 @@ static IEnumerable CheckInputTensorPresence(
}
// If the model uses discrete control but does not have an input for action masks
- if (model.HasDiscreteOutputs(deterministicInference))
+ if (model.HasDiscreteOutputs())
{
if (!tensorsNames.Contains(TensorNames.ActionMaskPlaceholder))
{
@@ -382,19 +376,17 @@ static IEnumerable CheckInputTensorPresence(
/// The Barracuda engine model for loading static parameters
///
/// The memory size that the model is expecting/
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
///
/// A IEnumerable of the checks that failed
///
- static IEnumerable CheckOutputTensorPresence(Model model, int memory, bool deterministicInference = false)
+ static IEnumerable CheckOutputTensorPresence(Model model, int memory)
{
var failedModelChecks = new List();
// If there is no Recurrent Output but the model is Recurrent.
if (memory > 0)
{
- var allOutputs = model.GetOutputNames(deterministicInference).ToList();
+ var allOutputs = model.GetOutputNames().ToList();
if (!allOutputs.Any(x => x == TensorNames.RecurrentOutput))
{
failedModelChecks.Add(
diff --git a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
index f59b54ee23..422e0f9744 100644
--- a/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
+++ b/com.unity.ml-agents/Runtime/Inference/ModelRunner.cs
@@ -28,7 +28,6 @@ internal class ModelRunner
InferenceDevice m_InferenceDevice;
IWorker m_Engine;
bool m_Verbose = false;
- bool m_DeterministicInference;
string[] m_OutputNames;
IReadOnlyList m_InferenceInputs;
List m_InferenceOutputs;
@@ -49,22 +48,18 @@ internal class ModelRunner
/// option for most of ML Agents models.
/// The seed that will be used to initialize the RandomNormal
/// and Multinomial objects used when running inference.
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
/// Throws an error when the model is null
///
public ModelRunner(
NNModel model,
ActionSpec actionSpec,
InferenceDevice inferenceDevice,
- int seed = 0,
- bool deterministicInference = false)
+ int seed = 0)
{
Model barracudaModel;
m_Model = model;
m_ModelName = model.name;
m_InferenceDevice = inferenceDevice;
- m_DeterministicInference = deterministicInference;
m_TensorAllocator = new TensorCachingAllocator();
if (model != null)
{
@@ -113,12 +108,11 @@ public ModelRunner(
}
m_InferenceInputs = barracudaModel.GetInputTensors();
- m_OutputNames = barracudaModel.GetOutputNames(m_DeterministicInference);
-
+ m_OutputNames = barracudaModel.GetOutputNames();
m_TensorGenerator = new TensorGenerator(
- seed, m_TensorAllocator, m_Memories, barracudaModel, m_DeterministicInference);
+ seed, m_TensorAllocator, m_Memories, barracudaModel);
m_TensorApplier = new TensorApplier(
- actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel, m_DeterministicInference);
+ actionSpec, seed, m_TensorAllocator, m_Memories, barracudaModel);
m_InputsByName = new Dictionary();
m_InferenceOutputs = new List();
}
diff --git a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
index a03b3d927e..d311010aae 100644
--- a/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorApplier.cs
@@ -44,15 +44,12 @@ public interface IApplier
/// Tensor allocator
/// Dictionary of AgentInfo.id to memory used to pass to the inference model.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
public TensorApplier(
ActionSpec actionSpec,
int seed,
ITensorAllocator allocator,
Dictionary> memories,
- object barracudaModel = null,
- bool deterministicInference = false)
+ object barracudaModel = null)
{
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
@@ -67,13 +64,13 @@ public TensorApplier(
}
if (actionSpec.NumContinuousActions > 0)
{
- var tensorName = model.ContinuousOutputName(deterministicInference);
+ var tensorName = model.ContinuousOutputName();
m_Dict[tensorName] = new ContinuousActionOutputApplier(actionSpec);
}
var modelVersion = model.GetVersion();
if (actionSpec.NumDiscreteActions > 0)
{
- var tensorName = model.DiscreteOutputName(deterministicInference);
+ var tensorName = model.DiscreteOutputName();
if (modelVersion == (int)BarracudaModelParamLoader.ModelApiVersion.MLAgents1_0)
{
m_Dict[tensorName] = new LegacyDiscreteActionOutputApplier(actionSpec, seed, allocator);
diff --git a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
index 39bed85792..feb521ebd8 100644
--- a/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorGenerator.cs
@@ -44,14 +44,11 @@ void Generate(
/// Tensor allocator.
/// Dictionary of AgentInfo.id to memory for use in the inference model.
///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
public TensorGenerator(
int seed,
ITensorAllocator allocator,
Dictionary> memories,
- object barracudaModel = null,
- bool deterministicInference = false)
+ object barracudaModel = null)
{
// If model is null, no inference to run and exception is thrown before reaching here.
if (barracudaModel == null)
@@ -79,13 +76,13 @@ public TensorGenerator(
// Generators for Outputs
- if (model.HasContinuousOutputs(deterministicInference))
+ if (model.HasContinuousOutputs())
{
- m_Dict[model.ContinuousOutputName(deterministicInference)] = new BiDimensionalOutputGenerator(allocator);
+ m_Dict[model.ContinuousOutputName()] = new BiDimensionalOutputGenerator(allocator);
}
- if (model.HasDiscreteOutputs(deterministicInference))
+ if (model.HasDiscreteOutputs())
{
- m_Dict[model.DiscreteOutputName(deterministicInference)] = new BiDimensionalOutputGenerator(allocator);
+ m_Dict[model.DiscreteOutputName()] = new BiDimensionalOutputGenerator(allocator);
}
m_Dict[TensorNames.RecurrentOutput] = new BiDimensionalOutputGenerator(allocator);
m_Dict[TensorNames.ValueEstimateOutput] = new BiDimensionalOutputGenerator(allocator);
diff --git a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
index 48ae04b5f6..dc20e1f8f3 100644
--- a/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
+++ b/com.unity.ml-agents/Runtime/Inference/TensorNames.cs
@@ -23,8 +23,6 @@ internal static class TensorNames
public const string DiscreteActionOutputShape = "discrete_action_output_shape";
public const string ContinuousActionOutput = "continuous_actions";
public const string DiscreteActionOutput = "discrete_actions";
- public const string DeterministicContinuousActionOutput = "deterministic_continuous_actions";
- public const string DeterministicDiscreteActionOutput = "deterministic_discrete_actions";
// Deprecated TensorNames entries for backward compatibility
public const string IsContinuousControlDeprecated = "is_continuous_control";
diff --git a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
index 96a15b50d8..5e76084b20 100644
--- a/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BarracudaPolicy.cs
@@ -45,11 +45,6 @@ internal class BarracudaPolicy : IPolicy
ActionBuffers m_LastActionBuffer;
int m_AgentId;
- ///
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
- ///
- bool m_DeterministicInference;
///
/// Sensor shapes for the associated Agents. All Agents must have the same shapes for their Sensors.
@@ -78,23 +73,19 @@ internal class BarracudaPolicy : IPolicy
/// The Neural Network to use.
/// Which device Barracuda will run on.
/// The name of the behavior.
- /// Inference only: set to true if the action selection from model should be
- /// deterministic.
public BarracudaPolicy(
ActionSpec actionSpec,
IList actuators,
NNModel model,
InferenceDevice inferenceDevice,
- string behaviorName,
- bool deterministicInference = false
+ string behaviorName
)
{
- var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, actionSpec, inferenceDevice, deterministicInference);
+ var modelRunner = Academy.Instance.GetOrCreateModelRunner(model, actionSpec, inferenceDevice);
m_ModelRunner = modelRunner;
m_BehaviorName = behaviorName;
m_ActionSpec = actionSpec;
m_Actuators = actuators;
- m_DeterministicInference = deterministicInference;
}
///
diff --git a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
index b0d369b910..ae05284d50 100644
--- a/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
+++ b/com.unity.ml-agents/Runtime/Policies/BehaviorParameters.cs
@@ -177,20 +177,6 @@ public bool UseChildSensors
set { m_UseChildSensors = value; }
}
- [HideInInspector]
- [SerializeField]
- [Tooltip("Set action selection to deterministic, Only applies to inference from within unity.")]
- private bool m_DeterministicInference = false;
-
- ///
- /// Whether to select actions deterministically during inference from the provided neural network.
- ///
- public bool DeterministicInference
- {
- get { return m_DeterministicInference; }
- set { m_DeterministicInference = value; }
- }
-
///
/// Whether or not to use all the actuator components attached to child GameObjects of the agent.
/// Note that changing this after the Agent has been initialized will not have any effect.
@@ -242,7 +228,7 @@ internal IPolicy GeneratePolicy(ActionSpec actionSpec, ActuatorManager actuatorM
"Either assign a model, or change to a different Behavior Type."
);
}
- return new BarracudaPolicy(actionSpec, actuatorManager, m_Model, m_InferenceDevice, m_BehaviorName, m_DeterministicInference);
+ return new BarracudaPolicy(actionSpec, actuatorManager, m_Model, m_InferenceDevice, m_BehaviorName);
}
case BehaviorType.Default:
if (Academy.Instance.IsCommunicatorOn)
@@ -251,7 +237,7 @@ internal IPolicy GeneratePolicy(ActionSpec actionSpec, ActuatorManager actuatorM
}
if (m_Model != null)
{
- return new BarracudaPolicy(actionSpec, actuatorManager, m_Model, m_InferenceDevice, m_BehaviorName, m_DeterministicInference);
+ return new BarracudaPolicy(actionSpec, actuatorManager, m_Model, m_InferenceDevice, m_BehaviorName);
}
else
{
diff --git a/com.unity.ml-agents/Tests/Editor/Inference/ModelRunnerTest.cs b/com.unity.ml-agents/Tests/Editor/Inference/ModelRunnerTest.cs
index da802a38d5..0e81c4f8ad 100644
--- a/com.unity.ml-agents/Tests/Editor/Inference/ModelRunnerTest.cs
+++ b/com.unity.ml-agents/Tests/Editor/Inference/ModelRunnerTest.cs
@@ -1,4 +1,3 @@
-using System;
using System.Linq;
using NUnit.Framework;
using UnityEngine;
@@ -7,29 +6,9 @@
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Inference;
using Unity.MLAgents.Policies;
-using System.Collections.Generic;
namespace Unity.MLAgents.Tests
{
- public class FloatThresholdComparer : IEqualityComparer
- {
- private readonly float _threshold;
- public FloatThresholdComparer(float threshold)
- {
- _threshold = threshold;
- }
-
- public bool Equals(float x, float y)
- {
- return Math.Abs(x - y) < _threshold;
- }
-
- public int GetHashCode(float f)
- {
- throw new NotImplementedException("Unable to generate a hash code for threshold floats, do not use this method");
- }
- }
-
[TestFixture]
public class ModelRunnerTest
{
@@ -40,9 +19,6 @@ public class ModelRunnerTest
const string k_hybridONNXPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/hybrid0vis53vec_3c_2daction_v1_0.onnx";
const string k_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/continuous2vis8vec2action_deprecated_v1_0.nn";
const string k_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/discrete1vis0vec_2_3action_recurr_deprecated_v1_0.nn";
- // models with deterministic action tensors
- private const string k_deterministic_discreteNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx";
- private const string k_deterministic_continuousNNPath = "Packages/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx";
NNModel hybridONNXModelV2;
NNModel continuousONNXModel;
@@ -50,8 +26,6 @@ public class ModelRunnerTest
NNModel hybridONNXModel;
NNModel continuousNNModel;
NNModel discreteNNModel;
- NNModel deterministicDiscreteNNModel;
- NNModel deterministicContinuousNNModel;
Test3DSensorComponent sensor_21_20_3;
Test3DSensorComponent sensor_20_22_3;
@@ -81,8 +55,6 @@ public void SetUp()
hybridONNXModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_hybridONNXPath, typeof(NNModel));
continuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_continuousNNPath, typeof(NNModel));
discreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_discreteNNPath, typeof(NNModel));
- deterministicDiscreteNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_deterministic_discreteNNPath, typeof(NNModel));
- deterministicContinuousNNModel = (NNModel)AssetDatabase.LoadAssetAtPath(k_deterministic_continuousNNPath, typeof(NNModel));
var go = new GameObject("SensorA");
sensor_21_20_3 = go.AddComponent();
sensor_21_20_3.Sensor = new Test3DSensor("SensorA", 21, 20, 3);
@@ -99,8 +71,6 @@ public void TestModelExist()
Assert.IsNotNull(continuousNNModel);
Assert.IsNotNull(discreteNNModel);
Assert.IsNotNull(hybridONNXModelV2);
- Assert.IsNotNull(deterministicDiscreteNNModel);
- Assert.IsNotNull(deterministicContinuousNNModel);
}
[Test]
@@ -129,15 +99,6 @@ public void TestCreation()
// This one was trained with 2.0 so it should not raise an error:
modelRunner = new ModelRunner(hybridONNXModelV2, new ActionSpec(2, new[] { 2, 3 }), inferenceDevice);
modelRunner.Dispose();
-
- // V2.0 Model that has serialized deterministic action tensors, discrete
- modelRunner = new ModelRunner(deterministicDiscreteNNModel, new ActionSpec(0, new[] { 7 }), inferenceDevice);
- modelRunner.Dispose();
- // V2.0 Model that has serialized deterministic action tensors, continuous
- modelRunner = new ModelRunner(deterministicContinuousNNModel,
- GetContinuous2vis8vec2actionActionSpec(), inferenceDevice,
- deterministicInference: true);
- modelRunner.Dispose();
}
[Test]
@@ -177,60 +138,5 @@ public void TestRunModel()
Assert.AreEqual(actionSpec.NumDiscreteActions, modelRunner.GetAction(1).DiscreteActions.Length);
modelRunner.Dispose();
}
-
-
- [Test]
- public void TestRunModel_stochastic()
- {
- var actionSpec = GetContinuous2vis8vec2actionActionSpec();
- // deterministicInference = false by default
- var modelRunner = new ModelRunner(deterministicContinuousNNModel, actionSpec, InferenceDevice.Burst);
- var sensor_8 = new Sensors.VectorSensor(8, "VectorSensor8");
- var info1 = new AgentInfo();
- var obs = new[]
- {
- sensor_8,
- sensor_21_20_3.CreateSensors()[0],
- sensor_20_22_3.CreateSensors()[0]
- }.ToList();
- info1.episodeId = 1;
- modelRunner.PutObservations(info1, obs);
- modelRunner.DecideBatch();
- var stochAction1 = (float[])modelRunner.GetAction(1).ContinuousActions.Array.Clone();
-
- modelRunner.PutObservations(info1, obs);
- modelRunner.DecideBatch();
- var stochAction2 = (float[])modelRunner.GetAction(1).ContinuousActions.Array.Clone();
- // Stochastic action selection should output randomly different action values with same obs
- Assert.IsFalse(Enumerable.SequenceEqual(stochAction1, stochAction2, new FloatThresholdComparer(0.001f)));
- modelRunner.Dispose();
- }
- [Test]
- public void TestRunModel_deterministic()
- {
- var actionSpec = GetContinuous2vis8vec2actionActionSpec();
- var modelRunner = new ModelRunner(deterministicContinuousNNModel, actionSpec, InferenceDevice.Burst);
- var sensor_8 = new Sensors.VectorSensor(8, "VectorSensor8");
- var info1 = new AgentInfo();
- var obs = new[]
- {
- sensor_8,
- sensor_21_20_3.CreateSensors()[0],
- sensor_20_22_3.CreateSensors()[0]
- }.ToList();
- var deterministicModelRunner = new ModelRunner(deterministicContinuousNNModel, actionSpec, InferenceDevice.Burst,
- deterministicInference: true);
- info1.episodeId = 1;
- deterministicModelRunner.PutObservations(info1, obs);
- deterministicModelRunner.DecideBatch();
- var deterministicAction1 = (float[])deterministicModelRunner.GetAction(1).ContinuousActions.Array.Clone();
-
- deterministicModelRunner.PutObservations(info1, obs);
- deterministicModelRunner.DecideBatch();
- var deterministicAction2 = (float[])deterministicModelRunner.GetAction(1).ContinuousActions.Array.Clone();
- // Deterministic action selection should output same action everytime
- Assert.IsTrue(Enumerable.SequenceEqual(deterministicAction1, deterministicAction2, new FloatThresholdComparer(0.001f)));
- modelRunner.Dispose();
- }
}
}
diff --git a/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx b/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx
deleted file mode 100644
index 56c1cd4355..0000000000
Binary files a/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx and /dev/null differ
diff --git a/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx.meta b/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx.meta
deleted file mode 100644
index cc92cc94b8..0000000000
--- a/com.unity.ml-agents/Tests/Editor/TestModels/deterContinuous2vis8vec2action_v2_0.onnx.meta
+++ /dev/null
@@ -1,14 +0,0 @@
-fileFormatVersion: 2
-guid: e905d8f9eadcf45aa8c485594fecba6d
-ScriptedImporter:
- internalIDToNameTable: []
- externalObjects: {}
- serializedVersion: 2
- userData:
- assetBundleName:
- assetBundleVariant:
- script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
- optimizeModel: 1
- forceArbitraryBatchSize: 1
- treatErrorsAsWarnings: 0
- importMode: 1
diff --git a/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx b/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx
deleted file mode 100644
index 3aa846e204..0000000000
Binary files a/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx and /dev/null differ
diff --git a/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx.meta b/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx.meta
deleted file mode 100644
index a141a55235..0000000000
--- a/com.unity.ml-agents/Tests/Editor/TestModels/deterDiscrete1obs3action_v2_0.onnx.meta
+++ /dev/null
@@ -1,14 +0,0 @@
-fileFormatVersion: 2
-guid: d132cc9c934a54fdc99758427373e038
-ScriptedImporter:
- internalIDToNameTable: []
- externalObjects: {}
- serializedVersion: 2
- userData:
- assetBundleName:
- assetBundleVariant:
- script: {fileID: 11500000, guid: 683b6cb6d0a474744822c888b46772c9, type: 3}
- optimizeModel: 1
- forceArbitraryBatchSize: 1
- treatErrorsAsWarnings: 0
- importMode: 1
diff --git a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
index 743212a69b..0c3b6312b4 100644
--- a/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
+++ b/com.unity.ml-agents/Tests/Runtime/RuntimeAPITest.cs
@@ -70,7 +70,6 @@ public IEnumerator RuntimeApiTestWithEnumeratorPasses()
behaviorParams.BehaviorName = "TestBehavior";
behaviorParams.TeamId = 42;
behaviorParams.UseChildSensors = true;
- behaviorParams.DeterministicInference = false;
behaviorParams.ObservableAttributeHandling = ObservableAttributeOptions.ExamineAll;
diff --git a/docs/Getting-Started.md b/docs/Getting-Started.md
index 7baf461ebd..c4ec5332ad 100644
--- a/docs/Getting-Started.md
+++ b/docs/Getting-Started.md
@@ -119,9 +119,6 @@ example.
**Note** : You can modify multiple game objects in a scene by selecting them
all at once using the search bar in the Scene Hierarchy.
1. Set the **Inference Device** to use for this model as `CPU`.
-1. If the model is trained with Release 19 or later, you can select
- `Deterministic Inference` to choose actions deterministically from the model.
- Works only for inference within unity with no python process involved.
1. Click the **Play** button in the Unity Editor and you will see the platforms
balance the balls using the pre-trained model.
diff --git a/docs/Learning-Environment-Design-Agents.md b/docs/Learning-Environment-Design-Agents.md
index dc7022189f..8953894957 100644
--- a/docs/Learning-Environment-Design-Agents.md
+++ b/docs/Learning-Environment-Design-Agents.md
@@ -987,9 +987,6 @@ be called independently of the `Max Step` property.
training)
- `Inference Device` - Whether to use CPU or GPU to run the model during
inference
- - `Deterministic Inference` - Weather to set action selection to deterministic,
- Only applies to inference from within unity (with no python process involved) and
- Release 19 or later.
- `Behavior Type` - Determines whether the Agent will do training, inference,
or use its Heuristic() method:
- `Default` - the Agent will train if they connect to a python trainer,
diff --git a/docs/Training-Configuration-File.md b/docs/Training-Configuration-File.md
index a3e6cc35fb..537bea2f3e 100644
--- a/docs/Training-Configuration-File.md
+++ b/docs/Training-Configuration-File.md
@@ -44,7 +44,6 @@ choice of the trainer (which we review on subsequent sections).
| `network_settings -> normalize` | (default = `false`) Whether normalization is applied to the vector observation inputs. This normalization is based on the running average and variance of the vector observation. Normalization can be helpful in cases with complex continuous control problems, but may be harmful with simpler discrete control problems. |
| `network_settings -> vis_encode_type` | (default = `simple`) Encoder type for encoding visual observations.
`simple` (default) uses a simple encoder which consists of two convolutional layers, `nature_cnn` uses the CNN implementation proposed by [Mnih et al.](https://www.nature.com/articles/nature14236), consisting of three convolutional layers, and `resnet` uses the [IMPALA Resnet](https://arxiv.org/abs/1802.01561) consisting of three stacked layers, each with two residual blocks, making a much larger network than the other two. `match3` is a smaller CNN ([Gudmundsoon et al.](https://www.researchgate.net/publication/328307928_Human-Like_Playtesting_with_Deep_Learning)) that can capture more granular spatial relationships and is optimized for board games. `fully_connected` uses a single fully connected dense layer as encoder without any convolutional layers.
Due to the size of convolution kernel, there is a minimum observation size limitation that each encoder type can handle - `simple`: 20x20, `nature_cnn`: 36x36, `resnet`: 15 x 15, `match3`: 5x5. `fully_connected` doesn't have convolutional layers and thus no size limits, but since it has less representation power it should be reserved for very small inputs. Note that using the `match3` CNN with very large visual input might result in a huge observation encoding and thus potentially slow down training or cause memory issues. |
| `network_settings -> conditioning_type` | (default = `hyper`) Conditioning type for the policy using goal observations.
`none` treats the goal observations as regular observations, `hyper` (default) uses a HyperNetwork with goal observations as input to generate some of the weights of the policy. Note that when using `hyper` the number of parameters of the network increases greatly. Therefore, it is recommended to reduce the number of `hidden_units` when using this `conditioning_type`
-| `network_settings -> deterministic` | (default = `false`) When set to true, ensures that actions are selected from the models output deterministically to ensure predictable and reproducible results. This can be overwritten by the `--deterministic` flag on the CLI.
## Trainer-specific Configurations
diff --git a/docs/images/3dball_learning_brain.png b/docs/images/3dball_learning_brain.png
index 68757fa6ce..c133bf2779 100644
Binary files a/docs/images/3dball_learning_brain.png and b/docs/images/3dball_learning_brain.png differ
diff --git a/ml-agents/mlagents/trainers/cli_utils.py b/ml-agents/mlagents/trainers/cli_utils.py
index de420c42a4..5884c3a5c5 100644
--- a/ml-agents/mlagents/trainers/cli_utils.py
+++ b/ml-agents/mlagents/trainers/cli_utils.py
@@ -91,14 +91,6 @@ def _create_parser() -> argparse.ArgumentParser:
"before resuming training. This option is only valid when the models exist, and have the same "
"behavior names as the current agents in your scene.",
)
- argparser.add_argument(
- "--deterministic",
- default=False,
- dest="deterministic",
- action=DetectDefaultStoreTrue,
- help="Whether to select actions deterministically in policy. `dist.mean` for continuous action "
- "space, and `dist.argmax` for deterministic action space ",
- )
argparser.add_argument(
"--force",
default=False,
diff --git a/ml-agents/mlagents/trainers/settings.py b/ml-agents/mlagents/trainers/settings.py
index f0b859fa33..fe52fb838c 100644
--- a/ml-agents/mlagents/trainers/settings.py
+++ b/ml-agents/mlagents/trainers/settings.py
@@ -151,7 +151,6 @@ def _check_valid_memory_size(self, attribute, value):
vis_encode_type: EncoderType = EncoderType.SIMPLE
memory: Optional[MemorySettings] = None
goal_conditioning_type: ConditioningType = ConditioningType.HYPER
- deterministic: bool = parser.get_default("deterministic")
@attr.s(auto_attribs=True)
@@ -929,11 +928,9 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":
key
)
)
-
# Override with CLI args
# Keep deprecated --load working, TODO: remove
argparse_args["resume"] = argparse_args["resume"] or argparse_args["load_model"]
-
for key, val in argparse_args.items():
if key in DetectDefault.non_default_args:
if key in attr.fields_dict(CheckpointSettings):
@@ -953,16 +950,6 @@ def from_argparse(args: argparse.Namespace) -> "RunOptions":
if isinstance(final_runoptions.behaviors, TrainerSettings.DefaultTrainerDict):
# configure whether or not we should require all behavior names to be found in the config YAML
final_runoptions.behaviors.set_config_specified(_require_all_behaviors)
-
- _non_default_args = DetectDefault.non_default_args
-
- # Prioritize the deterministic mode from the cli for deterministic actions.
- if "deterministic" in _non_default_args:
- for behaviour in final_runoptions.behaviors.keys():
- final_runoptions.behaviors[
- behaviour
- ].network_settings.deterministic = argparse_args["deterministic"]
-
return final_runoptions
@staticmethod
diff --git a/ml-agents/mlagents/trainers/tests/test_settings.py b/ml-agents/mlagents/trainers/tests/test_settings.py
index 9d14fc5cc7..5fe453e43b 100644
--- a/ml-agents/mlagents/trainers/tests/test_settings.py
+++ b/ml-agents/mlagents/trainers/tests/test_settings.py
@@ -389,7 +389,6 @@ def test_exportable_settings(use_defaults):
init_entcoef: 0.5
reward_signal_steps_per_update: 10.0
network_settings:
- deterministic: true
normalize: false
hidden_units: 256
num_layers: 3
@@ -529,10 +528,7 @@ def test_environment_settings():
def test_default_settings():
# Make default settings, one nested and one not.
- default_settings = {
- "max_steps": 1,
- "network_settings": {"num_layers": 1000, "deterministic": True},
- }
+ default_settings = {"max_steps": 1, "network_settings": {"num_layers": 1000}}
behaviors = {"test1": {"max_steps": 2, "network_settings": {"hidden_units": 2000}}}
run_options_dict = {"default_settings": default_settings, "behaviors": behaviors}
run_options = RunOptions.from_dict(run_options_dict)
@@ -545,9 +541,7 @@ def test_default_settings():
test1_settings = run_options.behaviors["test1"]
assert test1_settings.max_steps == 2
assert test1_settings.network_settings.hidden_units == 2000
- assert test1_settings.network_settings.deterministic is True
assert test1_settings.network_settings.num_layers == 1000
-
# Change the overridden fields back, and check if the rest are equal.
test1_settings.max_steps = 1
test1_settings.network_settings.hidden_units == default_settings_cls.network_settings.hidden_units
diff --git a/ml-agents/mlagents/trainers/tests/torch/test_action_model.py b/ml-agents/mlagents/trainers/tests/torch/test_action_model.py
index 5ffbd9ce6b..9722931446 100644
--- a/ml-agents/mlagents/trainers/tests/torch/test_action_model.py
+++ b/ml-agents/mlagents/trainers/tests/torch/test_action_model.py
@@ -11,10 +11,10 @@
from mlagents_envs.base_env import ActionSpec
-def create_action_model(inp_size, act_size, deterministic=False):
- mask = torch.ones([1, act_size ** 2])
+def create_action_model(inp_size, act_size):
+ mask = torch.ones([1, act_size * 2])
action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
- action_model = ActionModel(inp_size, action_spec, deterministic=deterministic)
+ action_model = ActionModel(inp_size, action_spec)
return action_model, mask
@@ -43,47 +43,6 @@ def test_sample_action():
assert _disc.shape == (1, 1)
-def test_deterministic_sample_action():
- inp_size = 4
- act_size = 8
- action_model, masks = create_action_model(inp_size, act_size, deterministic=True)
- sample_inp = torch.ones((1, inp_size))
- dists = action_model._get_dists(sample_inp, masks=masks)
- agent_action1 = action_model._sample_action(dists)
- agent_action2 = action_model._sample_action(dists)
- agent_action3 = action_model._sample_action(dists)
-
- assert torch.equal(agent_action1.continuous_tensor, agent_action2.continuous_tensor)
- assert torch.equal(agent_action1.continuous_tensor, agent_action3.continuous_tensor)
- assert torch.equal(agent_action1.discrete_tensor, agent_action2.discrete_tensor)
- assert torch.equal(agent_action1.discrete_tensor, agent_action3.discrete_tensor)
-
- action_model, masks = create_action_model(inp_size, act_size, deterministic=False)
- sample_inp = torch.ones((1, inp_size))
- dists = action_model._get_dists(sample_inp, masks=masks)
- agent_action1 = action_model._sample_action(dists)
- agent_action2 = action_model._sample_action(dists)
- agent_action3 = action_model._sample_action(dists)
-
- assert not torch.equal(
- agent_action1.continuous_tensor, agent_action2.continuous_tensor
- )
-
- assert not torch.equal(
- agent_action1.continuous_tensor, agent_action3.continuous_tensor
- )
-
- chance_counter = 0
- if not torch.equal(agent_action1.discrete_tensor, agent_action2.discrete_tensor):
- chance_counter += 1
- if not torch.equal(agent_action1.discrete_tensor, agent_action3.discrete_tensor):
- chance_counter += 1
- if not torch.equal(agent_action2.discrete_tensor, agent_action3.discrete_tensor):
- chance_counter += 1
-
- assert chance_counter > 1
-
-
def test_get_probs_and_entropy():
inp_size = 4
act_size = 2
@@ -120,36 +79,3 @@ def test_get_probs_and_entropy():
for ent, val in zip(entropies[0].tolist(), [1.4189, 0.6191, 0.6191]):
assert ent == pytest.approx(val, abs=0.01)
-
-
-def test_get_onnx_deterministic_tensors():
- inp_size = 4
- act_size = 2
- action_model, masks = create_action_model(inp_size, act_size)
- sample_inp = torch.ones((1, inp_size))
- out_tensors = action_model.get_action_out(sample_inp, masks=masks)
- (
- continuous_out,
- discrete_out,
- action_out_deprecated,
- deterministic_continuous_out,
- deterministic_discrete_out,
- ) = out_tensors
- assert continuous_out.shape == (1, 2)
- assert discrete_out.shape == (1, 2)
- assert deterministic_discrete_out.shape == (1, 2)
- assert deterministic_continuous_out.shape == (1, 2)
-
- # Second sampling from same distribution
- out_tensors2 = action_model.get_action_out(sample_inp, masks=masks)
- (
- continuous_out_2,
- discrete_out_2,
- action_out_2_deprecated,
- deterministic_continuous_out_2,
- deterministic_discrete_out_2,
- ) = out_tensors2
- assert ~torch.all(torch.eq(continuous_out, continuous_out_2))
- assert torch.all(
- torch.eq(deterministic_continuous_out, deterministic_continuous_out_2)
- )
diff --git a/ml-agents/mlagents/trainers/torch/action_model.py b/ml-agents/mlagents/trainers/torch/action_model.py
index 65dfd32b40..c5de586e4d 100644
--- a/ml-agents/mlagents/trainers/torch/action_model.py
+++ b/ml-agents/mlagents/trainers/torch/action_model.py
@@ -10,7 +10,6 @@
from mlagents.trainers.torch.action_log_probs import ActionLogProbs
from mlagents_envs.base_env import ActionSpec
-
EPSILON = 1e-7 # Small value to avoid divide by zero
@@ -33,7 +32,6 @@ def __init__(
action_spec: ActionSpec,
conditional_sigma: bool = False,
tanh_squash: bool = False,
- deterministic: bool = False,
):
"""
A torch module that represents the action space of a policy. The ActionModel may contain
@@ -45,7 +43,6 @@ def __init__(
:params action_spec: The ActionSpec defining the action space dimensions and distributions.
:params conditional_sigma: Whether or not the std of a Gaussian is conditioned on state.
:params tanh_squash: Whether to squash the output of a Gaussian with the tanh function.
- :params deterministic: Whether to select actions deterministically in policy.
"""
super().__init__()
self.encoding_size = hidden_size
@@ -69,7 +66,6 @@ def __init__(
# During training, clipping is done in TorchPolicy, but we need to clip before ONNX
# export as well.
self._clip_action_on_export = not tanh_squash
- self._deterministic = deterministic
def _sample_action(self, dists: DistInstances) -> AgentAction:
"""
@@ -77,24 +73,15 @@ def _sample_action(self, dists: DistInstances) -> AgentAction:
:params dists: The DistInstances tuple
:return: An AgentAction corresponding to the actions sampled from the DistInstances
"""
-
continuous_action: Optional[torch.Tensor] = None
discrete_action: Optional[List[torch.Tensor]] = None
# This checks None because mypy complains otherwise
- print(self._deterministic)
if dists.continuous is not None:
- if self._deterministic:
- continuous_action = dists.continuous.deterministic_sample()
- else:
- continuous_action = dists.continuous.sample()
+ continuous_action = dists.continuous.sample()
if dists.discrete is not None:
discrete_action = []
- if self._deterministic:
- for discrete_dist in dists.discrete:
- discrete_action.append(discrete_dist.deterministic_sample())
- else:
- for discrete_dist in dists.discrete:
- discrete_action.append(discrete_dist.sample())
+ for discrete_dist in dists.discrete:
+ discrete_action.append(discrete_dist.sample())
return AgentAction(continuous_action, discrete_action)
def _get_dists(self, inputs: torch.Tensor, masks: torch.Tensor) -> DistInstances:
@@ -174,20 +161,12 @@ def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Ten
"""
dists = self._get_dists(inputs, masks)
continuous_out, discrete_out, action_out_deprecated = None, None, None
- deterministic_continuous_out, deterministic_discrete_out = (
- None,
- None,
- ) # deterministic actions
if self.action_spec.continuous_size > 0 and dists.continuous is not None:
continuous_out = dists.continuous.exported_model_output()
- action_out_deprecated = continuous_out
- deterministic_continuous_out = dists.continuous.deterministic_sample()
+ action_out_deprecated = dists.continuous.exported_model_output()
if self._clip_action_on_export:
continuous_out = torch.clamp(continuous_out, -3, 3) / 3
- action_out_deprecated = continuous_out
- deterministic_continuous_out = (
- torch.clamp(deterministic_continuous_out, -3, 3) / 3
- )
+ action_out_deprecated = torch.clamp(action_out_deprecated, -3, 3) / 3
if self.action_spec.discrete_size > 0 and dists.discrete is not None:
discrete_out_list = [
discrete_dist.exported_model_output()
@@ -195,23 +174,10 @@ def get_action_out(self, inputs: torch.Tensor, masks: torch.Tensor) -> torch.Ten
]
discrete_out = torch.cat(discrete_out_list, dim=1)
action_out_deprecated = torch.cat(discrete_out_list, dim=1)
- deterministic_discrete_out_list = [
- discrete_dist.deterministic_sample() for discrete_dist in dists.discrete
- ]
- deterministic_discrete_out = torch.cat(
- deterministic_discrete_out_list, dim=1
- )
-
# deprecated action field does not support hybrid action
if self.action_spec.continuous_size > 0 and self.action_spec.discrete_size > 0:
action_out_deprecated = None
- return (
- continuous_out,
- discrete_out,
- action_out_deprecated,
- deterministic_continuous_out,
- deterministic_discrete_out,
- )
+ return continuous_out, discrete_out, action_out_deprecated
def forward(
self, inputs: torch.Tensor, masks: torch.Tensor
diff --git a/ml-agents/mlagents/trainers/torch/distributions.py b/ml-agents/mlagents/trainers/torch/distributions.py
index 25b11edf85..1f5960d10b 100644
--- a/ml-agents/mlagents/trainers/torch/distributions.py
+++ b/ml-agents/mlagents/trainers/torch/distributions.py
@@ -16,13 +16,6 @@ def sample(self) -> torch.Tensor:
"""
pass
- @abc.abstractmethod
- def deterministic_sample(self) -> torch.Tensor:
- """
- Return the most probable sample from this distribution.
- """
- pass
-
@abc.abstractmethod
def log_prob(self, value: torch.Tensor) -> torch.Tensor:
"""
@@ -66,9 +59,6 @@ def sample(self):
sample = self.mean + torch.randn_like(self.mean) * self.std
return sample
- def deterministic_sample(self):
- return self.mean
-
def log_prob(self, value):
var = self.std ** 2
log_scale = torch.log(self.std + EPSILON)
@@ -123,9 +113,6 @@ def __init__(self, logits):
def sample(self):
return torch.multinomial(self.probs, 1)
- def deterministic_sample(self):
- return torch.argmax(self.probs, dim=1, keepdim=True)
-
def pdf(self, value):
# This function is equivalent to torch.diag(self.probs.T[value.flatten().long()]),
# but torch.diag is not supported by ONNX export.
@@ -225,7 +212,6 @@ def _mask_branch(
# We do -1 * tensor + constant instead of constant - tensor because it seems
# Barracuda might swap the inputs of a "Sub" operation
logits = logits * allow_mask - 1e8 * block_mask
-
return logits
def _split_masks(self, masks: torch.Tensor) -> List[torch.Tensor]:
diff --git a/ml-agents/mlagents/trainers/torch/model_serialization.py b/ml-agents/mlagents/trainers/torch/model_serialization.py
index f204b52445..0fa946280c 100644
--- a/ml-agents/mlagents/trainers/torch/model_serialization.py
+++ b/ml-agents/mlagents/trainers/torch/model_serialization.py
@@ -56,13 +56,10 @@ class TensorNames:
recurrent_output = "recurrent_out"
memory_size = "memory_size"
version_number = "version_number"
-
continuous_action_output_shape = "continuous_action_output_shape"
discrete_action_output_shape = "discrete_action_output_shape"
continuous_action_output = "continuous_actions"
discrete_action_output = "discrete_actions"
- deterministic_continuous_action_output = "deterministic_continuous_actions"
- deterministic_discrete_action_output = "deterministic_discrete_actions"
# Deprecated TensorNames entries for backward compatibility
is_continuous_control_deprecated = "is_continuous_control"
@@ -125,7 +122,6 @@ def __init__(self, policy):
self.output_names += [
TensorNames.continuous_action_output,
TensorNames.continuous_action_output_shape,
- TensorNames.deterministic_continuous_action_output,
]
self.dynamic_axes.update(
{TensorNames.continuous_action_output: {0: "batch"}}
@@ -134,7 +130,6 @@ def __init__(self, policy):
self.output_names += [
TensorNames.discrete_action_output,
TensorNames.discrete_action_output_shape,
- TensorNames.deterministic_discrete_action_output,
]
self.dynamic_axes.update({TensorNames.discrete_action_output: {0: "batch"}})
diff --git a/ml-agents/mlagents/trainers/torch/networks.py b/ml-agents/mlagents/trainers/torch/networks.py
index be8fb4b732..4a2e1dafc6 100644
--- a/ml-agents/mlagents/trainers/torch/networks.py
+++ b/ml-agents/mlagents/trainers/torch/networks.py
@@ -617,7 +617,6 @@ def __init__(
action_spec,
conditional_sigma=conditional_sigma,
tanh_squash=tanh_squash,
- deterministic=network_settings.deterministic,
)
@property
@@ -676,22 +675,12 @@ def forward(
cont_action_out,
disc_action_out,
action_out_deprecated,
- deterministic_cont_action_out,
- deterministic_disc_action_out,
) = self.action_model.get_action_out(encoding, masks)
export_out = [self.version_number, self.memory_size_vector]
if self.action_spec.continuous_size > 0:
- export_out += [
- cont_action_out,
- self.continuous_act_size_vector,
- deterministic_cont_action_out,
- ]
+ export_out += [cont_action_out, self.continuous_act_size_vector]
if self.action_spec.discrete_size > 0:
- export_out += [
- disc_action_out,
- self.discrete_act_size_vector,
- deterministic_disc_action_out,
- ]
+ export_out += [disc_action_out, self.discrete_act_size_vector]
if self.network_body.memory_size > 0:
export_out += [memories_out]
return tuple(export_out)