sotopia-lab
diff --git a/‎docs/pages/index.mdx‎
Lines changed: 2 additions & 2 deletions b/‎docs/pages/index.mdx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/benchmark_evaluator.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/benchmark_evaluator.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/evaluate_existing_episode.py‎
Lines changed: 1 addition & 4 deletions b/‎examples/evaluate_existing_episode.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎examples/experiment_eval.py‎
Lines changed: 5 additions & 6 deletions b/‎examples/experiment_eval.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎examples/experimental/group_discussion_agents/group_discussion_agents.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/experimental/group_discussion_agents/group_discussion_agents.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/experimental/interview_openhands/llm_agent.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/experimental/interview_openhands/llm_agent.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/experimental/sotopia_original_replica/llm_agent_sotopia.py‎
Lines changed: 50 additions & 10 deletions b/‎examples/experimental/sotopia_original_replica/llm_agent_sotopia.py‎
Lines changed: 50 additions & 10 deletions
diff --git a/‎examples/experimental/sotopia_original_replica/origin.toml‎
Lines changed: 18 additions & 3 deletions b/‎examples/experimental/sotopia_original_replica/origin.toml‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎examples/fix_missing_episodes.py‎
Lines changed: 17 additions & 22 deletions b/‎examples/fix_missing_episodes.py‎
Lines changed: 17 additions & 22 deletions
@@ -176,8 +176,8 @@ or manual setup:
         tar -xvzf redis-stack-server.tar.gz
         export PATH=$(pwd)/redis-stack-server-7.2.0-v10/bin:$PATH
         # if you are using Ubunutu 22.04, please do an extra step
-        wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.22_amd64.deb
-        sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2.22_amd64.deb
+        wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb
+        sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb
         ```
 
         ### Start the server
 
@@ -123,7 +123,7 @@ def evaluate_evaluator(
         )
         run_async_server_in_batch_aevaluate(
             tag=tag,
-            model=model,  # type: ignore
+            model=model,
             batch_size=batch_size,
             push_to_db=push_to_db,
             verbose=verbose,
@@ -142,7 +142,7 @@ def evaluate_evaluator(
         while to_re_evaluate_list:
             run_async_server_in_batch_aevaluate(
                 tag=tag,
-                model=model,  # type: ignore
+                model=model,
                 batch_size=batch_size,
                 push_to_db=push_to_db,
                 verbose=verbose,
 
@@ -1,7 +1,6 @@
 import asyncio
 import logging
 import subprocess
-import typing
 from datetime import datetime
 from logging import FileHandler
 
@@ -11,7 +10,6 @@
 from tqdm.asyncio import tqdm_asyncio
 
 from sotopia.database.logs import AnnotationForEpisode, EpisodeLog
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.server import aevaluate_one_episode
 
 # date and message only
@@ -40,7 +38,7 @@
 
 def run_async_server_in_batch_aevaluate(
     batch_size: int = 10,
-    model: LLM_Name = "gpt-4",
+    model: str = "gpt-4",
     reeval_list: list[str] = [],
     tag: str | None = None,
     push_to_db: bool = False,
@@ -100,7 +98,6 @@ def run_server(
 ) -> None:
     annotated_episodes_pks = [anno.episode for anno in AnnotationForEpisode.all()]
     annotated_episodes_pks = list(set(annotated_episodes_pks))
-    model = typing.cast(LLM_Name, model)
     # Call the function with the specified parameters
     run_async_server_in_batch_aevaluate(
         tag=tag,
 
@@ -21,12 +21,11 @@
 )
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.messages import AgentAction, Observation
 from sotopia.samplers import (
     BaseSampler,
@@ -73,7 +72,7 @@
 def check_existing_episodes(
     env_id: str,
     agent_ids: list[str],
-    models: dict[str, LLM_Name],
+    models: dict[str, str],
     tag: str | None = None,
 ) -> bool:
     if tag:
@@ -106,7 +105,7 @@ def _sample_env_agent_combo_and_push_to_db(env_id: str) -> None:
 
 @gin.configurable
 def _iterate_env_agent_combo_not_in_db(
-    model_names: dict[str, LLM_Name],
+    model_names: dict[str, str],
     env_ids: list[str] = [],
     tag: str | None = None,
 ) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
@@ -164,7 +163,7 @@ def _iterate_env_agent_combo_not_in_db(
                     RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
                 ],
                 terminal_evaluators=[
-                    ReachGoalLLMEvaluator(
+                    EpisodeLLMEvaluator(
                         model_names["env"],
                         EvaluationForTwoAgents[evaluation_dimensions],  # type: ignore
                         # TODO check how to do type annotation
@@ -188,7 +187,7 @@ def _iterate_env_agent_combo_not_in_db(
 def run_async_server_in_batch(
     *,
     batch_size: int = 1,
-    model_names: dict[str, LLM_Name] = {
+    model_names: dict[str, str] = {
         "env": "gpt-4",
         "agent1": "gpt-4o-mini",
         "agent2": "gpt-4o-mini",
 
@@ -4,8 +4,7 @@
 from sotopia.agents.llm_agent import ainput
 from sotopia.experimental.agents.base_agent import BaseAgent
 
-from sotopia.generation_utils import agenerate
-from sotopia.generation_utils.generate import StrOutputParser
+from sotopia.generation_utils import agenerate, StrOutputParser
 from sotopia.messages import ActionType
 
 from pydantic import Field
 
@@ -12,8 +12,7 @@
 
 from sotopia.experimental.agents.base_agent import BaseAgent
 
-from sotopia.generation_utils import agenerate
-from sotopia.generation_utils.generate import StrOutputParser
+from sotopia.generation_utils import agenerate, StrOutputParser
 
 import json
 
 
@@ -1,14 +1,16 @@
 import logging
 import sys
+import json
 from rich.logging import RichHandler
 
 from aact import NodeFactory
 
 from sotopia.experimental.agents.base_agent import BaseAgent
 from sotopia.experimental.agents.datamodels import Observation, AgentAction
+from sotopia.database.persistent_profile import AgentProfile
+from typing import Any
 
-from sotopia.generation_utils import agenerate
-from sotopia.generation_utils.generate import StrOutputParser
+from sotopia.generation_utils import agenerate, StrOutputParser
 
 # Check Python version
 if sys.version_info >= (3, 11):
@@ -33,11 +35,13 @@ def __init__(
         input_channels: list[str],
         output_channel: str,
         query_interval: int,
-        agent_name: str,
         node_name: str,
-        goal: str,
         model_name: str,
-        redis_url: str,
+        goal: str,
+        agent_name: str = "",
+        background: dict[str, Any] | None = None,
+        agent_pk: str | None = None,
+        redis_url: str = "redis://localhost:6379/0",
     ):
         super().__init__(
             [(input_channel, Observation) for input_channel in input_channels],
@@ -47,23 +51,59 @@ def __init__(
         )
         self.output_channel = output_channel
         self.query_interval = query_interval
-        self.count_ticks = 0
+        self.count_ticks: int = 0
         self.message_history: list[Observation] = []
-        self.name = agent_name
-        self.model_name = model_name
-        self.goal = goal
+        self.goal: str = goal
+        self.model_name: str = model_name
+        self.agent_profile_pk: str | None = agent_pk
+        self.name: str = agent_name
+        self.background: dict[str, Any] | None = background
+        self.awake: bool = False
+
+    def set_profile(self, use_pk_value: bool) -> None:
+        if not use_pk_value:
+            assert (
+                self.background is not None and self.name is not None
+            ), "Background and name must be provided"
+            if " " in self.name:
+                first_name, last_name = self.name.split(" ", 1)
+            else:
+                first_name = self.name
+                last_name = ""
+            profile = AgentProfile(
+                first_name=first_name, last_name=last_name, **self.background
+            )
+            profile.save()
+        else:
+            profile = AgentProfile.get(pk=self.agent_profile_pk)
+
+        self.agent_profile_pk = profile.pk
+        self.name = " ".join([profile.first_name, profile.last_name]).strip()
+        self.background = profile.model_dump()
 
     def _format_message_history(self, message_history: list[Observation]) -> str:
         ## TODO: akhatua Fix the mapping of action to be gramatically correct
         return "\n".join(message.to_natural_language() for message in message_history)
 
     async def aact(self, obs: Observation) -> AgentAction:
         if obs.turn_number == -1:
+            if self.awake:
+                return AgentAction(
+                    agent_name=self.name,
+                    output_channel=self.output_channel,
+                    action_type="none",
+                    argument="",
+                )
+            args = json.loads(obs.last_turn)
+            self.set_profile(args["use_pk_value"])
+            self.awake = True
             return AgentAction(
                 agent_name=self.name,
                 output_channel=self.output_channel,
                 action_type="none",
-                argument=self.model_name,
+                argument=json.dumps(
+                    {"pk": self.agent_profile_pk, "model_name": self.model_name}
+                ),
             )
 
         self.message_history.append(obs)
 
@@ -1,5 +1,5 @@
 redis_url = "redis://localhost:6379/0"
-extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator"]
+extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator","sotopia.experimental.agents.evaluators"]
 
 
 [[nodes]]
@@ -9,11 +9,13 @@ node_class = "moderator"
 [nodes.node_args]
 output_channels = ["moderator:Jane", "moderator:Jack"]
 input_channels = ["Jane:moderator", "Jack:moderator"]
-agent_backgrounds = {"Jane" = "", "Jack" = ""}
+evaluator_channels = [["evaluator:moderator","moderator:evaluator"]]
 agent_mapping = {"moderator:Jane" = "Jane", "moderator:Jack" = "Jack"}
 scenario = "Two friends are sitting in a cafe and catching up with each other's lives."
-max_turns = 2
+max_turns = 3
 push_to_db = false
+evaluate_episode = true
+use_pk_value = false
 
 [[nodes]]
 node_name = "Jack"
@@ -26,6 +28,8 @@ output_channel = "Jack:moderator"
 goal = "Your goal is to borrow 5000 dollars from Jane."
 model_name = "gpt-4o-mini"
 agent_name = "Jack"
+background = {"occupation" = "construction worker"}
+agent_pk = ""
 
 
 [[nodes]]
@@ -39,6 +43,8 @@ input_channels = ["moderator:Jane"]
 goal = "Your goal is to help Jack however, you are in a finicial crisis yourself and can only afford to give him 500 dollars."
 model_name = "gpt-4o-mini"
 agent_name = "Jane"
+background = {"occupation" = "gardener"}
+agent_pk = ""
 
 [[nodes]]
 node_name = "chat_print"
@@ -50,3 +56,12 @@ node_class = "chat_print"
 
 [nodes.node_args]
 env_agents = ["Jack", "Jane"]
+
+[[nodes]]
+node_name = "evaluator"
+node_class = "evaluator"
+
+[nodes.node_args]
+input_channels = ["moderator:evaluator"]
+output_channels = ["evaluator:moderator"]
+model_name = "gpt-4o-mini"
@@ -20,12 +20,11 @@
 )
 from sotopia.envs.evaluators import (
     EvaluationForTwoAgents,
-    ReachGoalLLMEvaluator,
+    EpisodeLLMEvaluator,
     RuleBasedTerminatedEvaluator,
     SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.messages.message_classes import AgentAction, Observation
 from sotopia.samplers.base_sampler import BaseSampler, EnvAgentCombo
 from sotopia.server import run_async_server
@@ -92,10 +91,8 @@ def find_combo_pk(
 def get_combo_model_map(
     all_episodes: List[EpisodeLog],
     all_combos_map: Dict[str, EnvAgentComboStorage],
-) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
-        defaultdict(Counter)
-    )
+) -> Dict[str, Counter[tuple[str, str, str]]]:
+    combo_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(Counter)
     bad_combos = []
     valid_count = 0
     invalid_count = 0
@@ -132,9 +129,7 @@ def get_combo_model_map(
             all_combos_map,
         )
         if curr_combo_pk:
-            model_pair: tuple[LLM_Name, LLM_Name, LLM_Name] = cast(
-                tuple[LLM_Name, LLM_Name, LLM_Name], tuple(curr_ep.models)
-            )
+            model_pair: tuple[str, str, str] = tuple(curr_ep.models)  # type: ignore
             combo_model_map[curr_combo_pk][model_pair] += 1
             valid_count += 1
         else:
@@ -153,8 +148,8 @@ def get_combo_model_map(
 
 
 def get_all_model_pairs(
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
-) -> Set[tuple[LLM_Name, LLM_Name, LLM_Name]]:
+    combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
+) -> Set[tuple[str, str, str]]:
     all_model_pairs = set()
     for key in combo_model_map:
         for combo in combo_model_map[key]:
@@ -169,12 +164,12 @@ def get_all_model_pairs(
 
 
 def get_all_missing_model_pairs(
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
-    all_model_pairs: Set[tuple[LLM_Name, LLM_Name, LLM_Name]],
+    combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
+    all_model_pairs: Set[tuple[str, str, str]],
     num_required: int,
-) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
-    combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
-        defaultdict(Counter)
+) -> Dict[str, Counter[tuple[str, str, str]]]:
+    combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(
+        Counter
     )
     missing_count = 0
     for key in combo_model_map:
@@ -192,9 +187,9 @@ def get_all_missing_model_pairs(
 # temporally used for making sure unique (env, agents, models) setting; need to change
 # according to the Counter in the case needing to run multiple experiments for one setting
 def get_missing_model_combo_map(
-    combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
+    combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]],
     all_combos_map: Dict[str, EnvAgentComboStorage],
-) -> Dict[tuple[LLM_Name, LLM_Name], List[tuple[str, str, str]]]:
+) -> Dict[tuple[str, str], List[tuple[str, str, str]]]:
     missing_model_combo_map = defaultdict(list)
     for combo_pk in combo_missing_model_map:
         model_counter = combo_missing_model_map[combo_pk]
@@ -216,7 +211,7 @@ def get_missing_model_combo_map(
 
 
 def yield_env_agent_combo(
-    combo_ids: list[tuple[str, str, str]], model_names: dict[str, LLM_Name]
+    combo_ids: list[tuple[str, str, str]], model_names: dict[str, str]
 ) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
     for combo_id in combo_ids:
         env_id, agent_id1, agent_id2 = combo_id
@@ -229,7 +224,7 @@ def yield_env_agent_combo(
                 RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
             ],
             terminal_evaluators=[
-                ReachGoalLLMEvaluator(
+                EpisodeLLMEvaluator(
                     model_names["env"],
                     EvaluationForTwoAgents[SotopiaDimensions],
                 ),
@@ -249,8 +244,8 @@ def yield_env_agent_combo(
 
 @gin.configurable
 def re_run_missing_episodes(
-    combo_with_models: dict[tuple[LLM_Name, LLM_Name], list[tuple[str, str, str]]],
-    model_names: dict[str, LLM_Name] = {
+    combo_with_models: dict[tuple[str, str], list[tuple[str, str, str]]],
+    model_names: dict[str, str] = {
         "env": "gpt-4",
         "agent1": "gpt-4o-mini",
         "agent2": "gpt-4o-mini",