Fix test which seems to have been faulty before

aymeric-roucher · aymeric-roucher · commit fc7cc8978567 · 2025-06-17T20:17:46.000+02:00
diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
@@ -258,15 +258,15 @@ def __init__(
         self.prompt_templates = prompt_templates or EMPTY_PROMPT_TEMPLATES
         if prompt_templates is not None:
             missing_keys = set(EMPTY_PROMPT_TEMPLATES.keys()) - set(prompt_templates.keys())
-            assert not missing_keys, (
-                f"Some prompt templates are missing from your custom `prompt_templates`: {missing_keys}"
-            )
+            assert (
+                not missing_keys
+            ), f"Some prompt templates are missing from your custom `prompt_templates`: {missing_keys}"
             for key, value in EMPTY_PROMPT_TEMPLATES.items():
                 if isinstance(value, dict):
                     for subkey in value.keys():
-                        assert key in prompt_templates.keys() and (subkey in prompt_templates[key].keys()), (
-                            f"Some prompt templates are missing from your custom `prompt_templates`: {subkey} under {key}"
-                        )
+                        assert (
+                            key in prompt_templates.keys() and (subkey in prompt_templates[key].keys())
+                        ), f"Some prompt templates are missing from your custom `prompt_templates`: {subkey} under {key}"
 
         self.max_steps = max_steps
         self.step_number = 0
@@ -320,9 +320,9 @@ def _setup_managed_agents(self, managed_agents: list | None = None) -> None:
         """Setup managed agents with proper logging."""
         self.managed_agents = {}
         if managed_agents:
-            assert all(agent.name and agent.description for agent in managed_agents), (
-                "All managed agents need both a name and a description!"
-            )
+            assert all(
+                agent.name and agent.description for agent in managed_agents
+            ), "All managed agents need both a name and a description!"
             self.managed_agents = {agent.name: agent for agent in managed_agents}
 
     def _setup_tools(self, tools, add_base_tools):
@@ -465,8 +465,8 @@ def _run_stream(
                 planning_start_time = time.time()
                 planning_step = None
                 for element in self._generate_planning_step(
-                    task, is_first_step=(self.step_number == 1), step=self.step_number
-                ):
+                    task, is_first_step=len(self.memory.steps) == 0, step=self.step_number
+                ):  # Don't use the attribute step_number here, because there can be steps from previous runs
                     yield element
                     planning_step = element
                 assert isinstance(planning_step, PlanningStep)  # Last yielded element should be a PlanningStep
diff --git a/tests/test_agents.py b/tests/test_agents.py
@@ -655,27 +655,29 @@ def generate(self, messages, stop_sequences=None):
 
     def test_planning_step_with_injected_memory(self):
         """Test that planning step uses update plan prompts when memory is injected before run."""
-        agent = CodeAgent(tools=[], planning_interval=1, model=FakeCodeModelPlanning())
+        agent = CodeAgent(tools=[], planning_interval=1, model=FakeCodeModelPlanning(), verbosity_level=1000)
         task = "Continuous task"
 
         # Inject memory before run
         previous_step = TaskStep(task="Previous user request")
         agent.memory.steps.append(previous_step)
 
         # Run the agent
-        agent.run(task, reset=False)
+        agent.run(task, reset=False, max_steps=2)
 
         # Verify that the planning step used update plan prompts
         planning_steps = [step for step in agent.memory.steps if isinstance(step, PlanningStep)]
         assert len(planning_steps) > 0
 
         # Check that the planning step's model input messages contain the injected memory
-        planning_step = planning_steps[0]
-        assert len(planning_step.model_input_messages) == 3  # system message + memory messages + user message
-        assert planning_step.model_input_messages[0]["role"] == "system"
-        assert task in planning_step.model_input_messages[0]["content"][0]["text"]
-        assert planning_step.model_input_messages[1]["role"] == "user"
-        assert "Previous user request" in planning_step.model_input_messages[1]["content"][0]["text"]
+        update_plan_step = planning_steps[0]
+        assert (
+            len(update_plan_step.model_input_messages) == 4
+        )  # system message + memory messages (2 task messages) + user message
+        assert update_plan_step.model_input_messages[0]["role"] == "system"
+        assert task in update_plan_step.model_input_messages[0]["content"][0]["text"]
+        assert update_plan_step.model_input_messages[1]["role"] == "user"
+        assert "Previous user request" in update_plan_step.model_input_messages[1]["content"][0]["text"]
 
 
 class CustomFinalAnswerTool(FinalAnswerTool):
diff --git a/tests/test_memory.py b/tests/test_memory.py
@@ -1,4 +1,5 @@
 import pytest
+from PIL import Image
 
 from smolagents.agents import ToolCall
 from smolagents.memory import (
@@ -50,7 +51,7 @@ def test_action_step_dict():
         model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Hi"),
         model_output="Hi",
         observations="This is a nice observation",
-        observations_images=["image1.png"],
+        observations_images=[Image.new("RGB", (100, 100))],
         action_output="Output",
         token_usage=TokenUsage(input_tokens=10, output_tokens=20),
     )
@@ -76,8 +77,8 @@ def test_action_step_dict():
     assert "token_usage" in action_step_dict
     assert action_step_dict["token_usage"] == {"input_tokens": 10, "output_tokens": 20, "total_tokens": 30}
 
-    assert "step" in action_step_dict
-    assert action_step_dict["step"] == 1
+    assert "step_number" in action_step_dict
+    assert action_step_dict["step_number"] == 1
 
     assert "error" in action_step_dict
     assert action_step_dict["error"] is None
@@ -97,6 +98,8 @@ def test_action_step_dict():
     assert "observations" in action_step_dict
     assert action_step_dict["observations"] == "This is a nice observation"
 
+    assert "observations_images" in action_step_dict
+
     assert "action_output" in action_step_dict
     assert action_step_dict["action_output"] == "Output"
 
@@ -113,7 +116,7 @@ def test_action_step_to_messages():
         model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content="Hi"),
         model_output="Hi",
         observations="This is a nice observation",
-        observations_images=["image1.png"],
+        observations_images=[Image.new("RGB", (100, 100))],
         action_output="Output",
         token_usage=TokenUsage(input_tokens=10, output_tokens=20),
     )
@@ -197,7 +200,7 @@ def test_planning_step_to_messages():
 
 
 def test_task_step_to_messages():
-    task_step = TaskStep(task="This is a task.", task_images=["task_image1.png"])
+    task_step = TaskStep(task="This is a task.", task_images=[Image.new("RGB", (100, 100))])
     messages = task_step.to_messages(summary_mode=False)
     assert len(messages) == 1
     for message in messages: