Skip to content

[MLA-803] Add timer metadata to C# and python #3758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions com.unity.ml-agents/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Format of console output has changed slightly and now matches the name of the model/summary directory. (#3630, #3616)
- Added a feature to allow sending stats from C# environments to TensorBoard (and other python StatsWriters). To do this from your code, use `SideChannelUtils.GetSideChannel<StatsSideChannel>().AddStat(key, value)` (#3660)
- Renamed 'Generalization' feature to 'Environment Parameter Randomization'.
- Timer files now contain a dictionary of metadata, including things like the package version numbers.
- SideChannel IncomingMessages methods now take an optional default argument, which is used when trying to read more data than the message contains.
- The way that UnityEnvironment decides the port was changed. If no port is specified, the behavior will depend on the `file_name` parameter. If it is `None`, 5004 (the editor port) will be used; otherwise 5005 (the base environment port) will be used.
- Fixed an issue where exceptions from environments provided a returncode of 0. (#3680)
Expand Down
3 changes: 3 additions & 0 deletions com.unity.ml-agents/Runtime/Academy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,9 @@ static int ReadPortFromArgs()
/// </summary>
void InitializeEnvironment()
{
TimerStack.Instance.AddMetadata("communication_protocol_version", k_ApiVersion);
TimerStack.Instance.AddMetadata("package_version", k_PackageVersion);

EnableAutomaticStepping();

SideChannelUtils.RegisterSideChannel(new EngineConfigurationChannel());
Expand Down
69 changes: 53 additions & 16 deletions com.unity.ml-agents/Runtime/Timer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ internal class TimerNode
[DataMember(Name = "children", Order = 999)]
Dictionary<string, TimerNode> m_Children;

/// <summary>
/// Gauge Nodes to measure arbitrary values.
/// </summary>
[DataMember(Name = "gauges", EmitDefaultValue = false)]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since only the root node needs gauges and metadata, I made the RootNode a separate class. Just a memory micro-optimization.

Dictionary<string, GaugeNode> m_Gauges;

/// <summary>
/// Custom sampler used to add timings to the profiler.
/// </summary>
Expand Down Expand Up @@ -84,11 +78,6 @@ public double TotalSeconds
set {} // Serialization needs this, but unused.
}

public Dictionary<string, GaugeNode> Gauges
{
get { return m_Gauges; }
}

/// <summary>
/// Total seconds spent in this block, excluding it's children.
/// </summary>
Expand Down Expand Up @@ -132,7 +121,6 @@ public TimerNode(string name, bool isRoot = false)
// The root node doesn't have a sampler since that could interfere with the profiler.
m_NumCalls = 1;
m_TickStart = DateTime.Now.Ticks;
m_Gauges = new Dictionary<string, GaugeNode>();
}
else
{
Expand Down Expand Up @@ -220,6 +208,45 @@ public string DebugGetTimerString(string parentName = "", int level = 0)
}
}

[DataContract]
internal class RootNode : TimerNode
{
// Timer output format version
internal const string k_timerFormatVersion = "0.1.0";

[DataMember(Name = "metadata", Order = 0)]
Dictionary<string, string> m_Metadata = new Dictionary<string, string>();

/// <summary>
/// Gauge Nodes to measure arbitrary values.
/// </summary>
[DataMember(Name = "gauges", EmitDefaultValue = false)]
Dictionary<string, GaugeNode> m_Gauges = new Dictionary<string, GaugeNode>();

public RootNode(string name="root") : base(name, true)
{
m_Metadata.Add("timer_format_version", k_timerFormatVersion);
m_Metadata.Add("start_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");
m_Metadata.Add("unity_version", Application.unityVersion);
m_Metadata.Add("command_line_arguments", String.Join(" ", Environment.GetCommandLineArgs()));
}

public void AddMetadata(string key, string value)
{
m_Metadata[key] = value;
}

public Dictionary<string, GaugeNode> Gauges
{
get { return m_Gauges; }
}

public Dictionary<string, string> Metadata
{
get { return m_Metadata; }
}
}

/// <summary>
/// Tracks the most recent value of a metric. This is analogous to gauges in statsd.
/// </summary>
Expand Down Expand Up @@ -286,7 +313,8 @@ internal class TimerStack : IDisposable
static readonly TimerStack k_Instance = new TimerStack();

Stack<TimerNode> m_Stack;
TimerNode m_RootNode;
RootNode m_RootNode;
Dictionary<string, string> m_Metadata;

// Explicit static constructor to tell C# compiler
// not to mark type as beforefieldinit
Expand All @@ -306,7 +334,7 @@ static TimerStack()
public void Reset(string name = "root")
{
m_Stack = new Stack<TimerNode>();
m_RootNode = new TimerNode(name, true);
m_RootNode = new RootNode(name);
m_Stack.Push(m_RootNode);
}

Expand All @@ -318,7 +346,7 @@ public static TimerStack Instance
get { return k_Instance; }
}

internal TimerNode RootNode
internal RootNode RootNode
{
get { return m_RootNode; }
}
Expand All @@ -344,6 +372,11 @@ public void SetGauge(string name, float value)
}
}

public void AddMetadata(string key, string value)
{
m_RootNode.AddMetadata(key, value);
}

void Push(string name)
{
var current = m_Stack.Peek();
Expand Down Expand Up @@ -427,9 +460,13 @@ public void SaveJsonTimers(string filename = null)
/// <param name="stream"></param>
public void SaveJsonTimers(Stream stream)
{
// Add some final metadata info
AddMetadata("scene_name", SceneManager.GetActiveScene().name);
AddMetadata("end_time_seconds", $"{DateTimeOffset.Now.ToUnixTimeSeconds()}");

var jsonSettings = new DataContractJsonSerializerSettings();
jsonSettings.UseSimpleDictionaryFormat = true;
var ser = new DataContractJsonSerializer(typeof(TimerNode), jsonSettings);
var ser = new DataContractJsonSerializer(typeof(RootNode), jsonSettings);
ser.WriteObject(stream, m_RootNode);
}
}
Expand Down
2 changes: 2 additions & 0 deletions com.unity.ml-agents/Tests/Editor/TimerTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ public void TestNested()
using (myTimer.Scoped("bar"))
{
myTimer.SetGauge("my_gauge", i);
myTimer.AddMetadata("i", $"{i}");
}
}
}
Expand All @@ -30,6 +31,7 @@ public void TestNested()
Assert.AreEqual(0, gauge.minValue);
Assert.AreEqual(4, gauge.maxValue);
Assert.AreEqual(4, gauge.value);
Assert.AreEqual("4", myTimer.RootNode.Metadata["i"]);

var fooChildren = rootChildren["foo"].Children;
Assert.That(fooChildren, Contains.Key("bar"));
Expand Down
7 changes: 7 additions & 0 deletions ml-agents-envs/mlagents_envs/tests/test_timers.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ def test_timers() -> None:
}
},
"gauges": {"my_gauge": {"value": 4.0, "max": 4.0, "min": 0.0, "count": 3}},
"metadata": {
"timer_format_version": timers.TIMER_FORMAT_VERSION,
"start_time_seconds": mock.ANY,
"end_time_seconds": mock.ANY,
"python_version": mock.ANY,
"command_line_arguments": mock.ANY,
},
}

assert timer_tree == expected_tree
39 changes: 32 additions & 7 deletions ml-agents-envs/mlagents_envs/timers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ def main():
"""

import math
from time import perf_counter
import sys
import time

from contextlib import contextmanager
from typing import Any, Callable, Dict, Generator, TypeVar

TIMER_FORMAT_VERSION = "0.1.0"


class TimerNode:
"""
Expand Down Expand Up @@ -123,19 +126,23 @@ class TimerStack:
sure that pushes and pops are already matched.
"""

__slots__ = ["root", "stack", "start_time", "gauges"]
__slots__ = ["root", "stack", "start_time", "gauges", "metadata"]

def __init__(self):
self.root = TimerNode()
self.stack = [self.root]
self.start_time = perf_counter()
self.start_time = time.perf_counter()
self.gauges: Dict[str, GaugeNode] = {}
self.metadata: Dict[str, str] = {}
self._add_default_metadata()

def reset(self):
self.root = TimerNode()
self.stack = [self.root]
self.start_time = perf_counter()
self.start_time = time.perf_counter()
self.gauges: Dict[str, GaugeNode] = {}
self.metadata: Dict[str, str] = {}
self._add_default_metadata()

def push(self, name: str) -> TimerNode:
"""
Expand All @@ -157,7 +164,7 @@ def get_root(self) -> TimerNode:
Update the total time and count of the root name, and return it.
"""
root = self.root
root.total = perf_counter() - self.start_time
root.total = time.perf_counter() - self.start_time
root.count = 1
return root

Expand All @@ -175,6 +182,10 @@ def get_timing_tree(self, node: TimerNode = None) -> Dict[str, Any]:
if self.gauges:
res["gauges"] = self._get_gauges()

if self.metadata:
self.metadata["end_time_seconds"] = str(int(time.time()))
res["metadata"] = self.metadata

res["total"] = node.total
res["count"] = node.count

Expand Down Expand Up @@ -205,12 +216,21 @@ def set_gauge(self, name: str, value: float) -> None:
else:
self.gauges[name] = GaugeNode(value)

def add_metadata(self, key: str, value: str) -> None:
self.metadata[key] = value

def _get_gauges(self) -> Dict[str, Dict[str, float]]:
gauges = {}
for gauge_name, gauge_node in self.gauges.items():
gauges[gauge_name] = gauge_node.as_dict()
return gauges

def _add_default_metadata(self):
self.metadata["timer_format_version"] = TIMER_FORMAT_VERSION
self.metadata["start_time_seconds"] = str(int(time.time()))
self.metadata["python_version"] = sys.version
self.metadata["command_line_arguments"] = " ".join(sys.argv)


# Global instance of a TimerStack. This is generally all that we need for profiling, but you can potentially
# create multiple instances and pass them to the contextmanager
Expand All @@ -225,15 +245,15 @@ def hierarchical_timer(name: str, timer_stack: TimerStack = None) -> Generator:
"""
timer_stack = timer_stack or _global_timer_stack
timer_node = timer_stack.push(name)
start_time = perf_counter()
start_time = time.perf_counter()

try:
# The wrapped code block will run here.
yield timer_node
finally:
# This will trigger either when the context manager exits, or an exception is raised.
# We'll accumulate the time, and the exception (if any) gets raised automatically.
elapsed = perf_counter() - start_time
elapsed = time.perf_counter() - start_time
timer_node.add_time(elapsed)
timer_stack.pop()

Expand Down Expand Up @@ -268,6 +288,11 @@ def set_gauge(name: str, value: float, timer_stack: TimerStack = None) -> None:
timer_stack.set_gauge(name, value)


def add_metadata(key: str, value: str, timer_stack: TimerStack = None) -> None:
timer_stack = timer_stack or _global_timer_stack
timer_stack.add_metadata(key, value)


def get_timer_tree(timer_stack: TimerStack = None) -> Dict[str, Any]:
"""
Return the tree of timings from the TimerStack as a dictionary (or the global stack if none is provided)
Expand Down
12 changes: 11 additions & 1 deletion ml-agents/mlagents/trainers/learn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@
from mlagents_envs.side_channel.side_channel import SideChannel
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig
from mlagents_envs.exception import UnityEnvironmentException
from mlagents_envs.timers import hierarchical_timer, get_timer_tree
from mlagents_envs.timers import (
hierarchical_timer,
get_timer_tree,
add_metadata as add_timer_metadata,
)
from mlagents_envs import logging_util

logger = logging_util.get_logger(__name__)
Expand Down Expand Up @@ -485,6 +489,12 @@ def run_cli(options: RunOptions) -> None:
if options.cpu:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Add some timer metadata
add_timer_metadata("mlagents_version", mlagents.trainers.__version__)
add_timer_metadata("mlagents_envs_version", mlagents_envs.__version__)
add_timer_metadata("communication_protocol_version", UnityEnvironment.API_VERSION)
add_timer_metadata("tensorflow_version", tf_utils.tf.__version__)

if options.seed == -1:
run_seed = np.random.randint(0, 10000)
run_training(run_seed, options)
Expand Down