Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions chainerrl/action_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def __repr__(self):
def params(self):
return (self.q_values,)

def __getitem__(self, i):
return DiscreteActionValue(
self.q_values[i], q_values_formatter=self.q_values_formatter)


class DistributionalDiscreteActionValue(ActionValue):
"""distributional Q-function output for discrete action space.
Expand Down Expand Up @@ -179,6 +183,13 @@ def __repr__(self):
def params(self):
return (self.q_dist,)

def __getitem__(self, i):
return DistributionalDiscreteActionValue(
self.q_dist[i],
self.z_values,
q_values_formatter=self.q_values_formatter,
)


class QuadraticActionValue(ActionValue):
"""Q-function output for continuous action space.
Expand Down Expand Up @@ -258,6 +269,15 @@ def __repr__(self):
def params(self):
return (self.mu, self.mat, self.v)

def __getitem__(self, i):
return QuadraticActionValue(
self.mu[i],
self.mat[i],
self.v[i],
min_action=self.min_action,
max_action=self.max_action,
)


class SingleActionValue(ActionValue):
"""ActionValue that can evaluate only a single action."""
Expand Down Expand Up @@ -297,3 +317,6 @@ def params(self):
' graph that outputs SingleActionValue, use the variable returned'
' by its method such as evaluate_actions instead.')
return ()

def __getitem__(self, i):
raise NotImplementedError
43 changes: 43 additions & 0 deletions tests/test_action_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ def test_params(self):
self.assertEqual(len(self.qout.params), 1)
self.assertEqual(id(self.qout.params[0]), id(self.qout.q_values))

def test_getitem(self):
sliced = self.qout[:10]
np.testing.assert_equal(sliced.q_values.array, self.q_values[:10])
self.assertEqual(sliced.n_actions, self.action_size)
self.assertIs(sliced.q_values_formatter,
self.qout.q_values_formatter)


class TestDistributionalDiscreteActionValue(unittest.TestCase):

Expand Down Expand Up @@ -140,6 +147,15 @@ def test_params(self):
self.assertEqual(len(self.qout.params), 1)
self.assertIs(self.qout.params[0], self.qout.q_dist)

def test_getitem(self):
sliced = self.qout[:10]
np.testing.assert_equal(sliced.q_values.array, self.q_values[:10])
np.testing.assert_equal(sliced.z_values, self.z_values)
np.testing.assert_equal(sliced.q_dist.array, self.atom_probs[:10])
self.assertEqual(sliced.n_actions, self.action_size)
self.assertIs(sliced.q_values_formatter,
self.qout.q_values_formatter)


class TestQuadraticActionValue(unittest.TestCase):
def test_max_unbounded(self):
Expand Down Expand Up @@ -194,6 +210,29 @@ def test_max_bounded(self):
v_out[mu_is_not_allowed],
v[mu_is_not_allowed])

def test_getitem(self):
n_batch = 7
ndim_action = 3
mu = np.random.randn(n_batch, ndim_action).astype(np.float32)
mat = np.broadcast_to(
np.eye(ndim_action, dtype=np.float32)[None],
(n_batch, ndim_action, ndim_action))
v = np.random.randn(n_batch).astype(np.float32)
min_action, max_action = -1, 1
qout = action_value.QuadraticActionValue(
chainer.Variable(mu),
chainer.Variable(mat),
chainer.Variable(v),
min_action,
max_action,
)
sliced = qout[:3]
np.testing.assert_equal(sliced.mu.array, mu[:3])
np.testing.assert_equal(sliced.mat.array, mat[:3])
np.testing.assert_equal(sliced.v.array, v[:3])
np.testing.assert_equal(sliced.min_action, min_action)
np.testing.assert_equal(sliced.max_action, max_action)


@testing.parameterize(*testing.product({
'batch_size': [1, 3],
Expand Down Expand Up @@ -258,3 +297,7 @@ def test_compute_advantage(self):
def test_params(self):
# no params
self.assertEqual(len(self.av.params), 0)

def test_getitem(self):
with self.assertRaises(NotImplementedError):
self.av[:1]