Skip to content

Commit 39bfb21

Browse files
committed
Merge branch 'main' into release
merge main
2 parents 2716fc7 + fd1b4d9 commit 39bfb21

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+698
-1059
lines changed

docs/pages/index.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ or manual setup:
176176
tar -xvzf redis-stack-server.tar.gz
177177
export PATH=$(pwd)/redis-stack-server-7.2.0-v10/bin:$PATH
178178
# if you are using Ubunutu 22.04, please do an extra step
179-
wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2.22_amd64.deb
180-
sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2.22_amd64.deb
179+
wget http://nz2.archive.ubuntu.com/ubuntu/pool/main/o/openssl/libssl1.1_1.1.1f-1ubuntu2_amd64.deb
180+
sudo dpkg -i libssl1.1_1.1.1f-1ubuntu2_amd64.deb
181181
```
182182

183183
### Start the server

examples/benchmark_evaluator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def evaluate_evaluator(
123123
)
124124
run_async_server_in_batch_aevaluate(
125125
tag=tag,
126-
model=model, # type: ignore
126+
model=model,
127127
batch_size=batch_size,
128128
push_to_db=push_to_db,
129129
verbose=verbose,
@@ -142,7 +142,7 @@ def evaluate_evaluator(
142142
while to_re_evaluate_list:
143143
run_async_server_in_batch_aevaluate(
144144
tag=tag,
145-
model=model, # type: ignore
145+
model=model,
146146
batch_size=batch_size,
147147
push_to_db=push_to_db,
148148
verbose=verbose,

examples/evaluate_existing_episode.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import asyncio
22
import logging
33
import subprocess
4-
import typing
54
from datetime import datetime
65
from logging import FileHandler
76

@@ -11,7 +10,6 @@
1110
from tqdm.asyncio import tqdm_asyncio
1211

1312
from sotopia.database.logs import AnnotationForEpisode, EpisodeLog
14-
from sotopia.generation_utils.generate import LLM_Name
1513
from sotopia.server import aevaluate_one_episode
1614

1715
# date and message only
@@ -40,7 +38,7 @@
4038

4139
def run_async_server_in_batch_aevaluate(
4240
batch_size: int = 10,
43-
model: LLM_Name = "gpt-4",
41+
model: str = "gpt-4",
4442
reeval_list: list[str] = [],
4543
tag: str | None = None,
4644
push_to_db: bool = False,
@@ -100,7 +98,6 @@ def run_server(
10098
) -> None:
10199
annotated_episodes_pks = [anno.episode for anno in AnnotationForEpisode.all()]
102100
annotated_episodes_pks = list(set(annotated_episodes_pks))
103-
model = typing.cast(LLM_Name, model)
104101
# Call the function with the specified parameters
105102
run_async_server_in_batch_aevaluate(
106103
tag=tag,

examples/experiment_eval.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,11 @@
2121
)
2222
from sotopia.envs.evaluators import (
2323
EvaluationForTwoAgents,
24-
ReachGoalLLMEvaluator,
24+
EpisodeLLMEvaluator,
2525
RuleBasedTerminatedEvaluator,
2626
SotopiaDimensions,
2727
)
2828
from sotopia.envs.parallel import ParallelSotopiaEnv
29-
from sotopia.generation_utils.generate import LLM_Name
3029
from sotopia.messages import AgentAction, Observation
3130
from sotopia.samplers import (
3231
BaseSampler,
@@ -73,7 +72,7 @@
7372
def check_existing_episodes(
7473
env_id: str,
7574
agent_ids: list[str],
76-
models: dict[str, LLM_Name],
75+
models: dict[str, str],
7776
tag: str | None = None,
7877
) -> bool:
7978
if tag:
@@ -106,7 +105,7 @@ def _sample_env_agent_combo_and_push_to_db(env_id: str) -> None:
106105

107106
@gin.configurable
108107
def _iterate_env_agent_combo_not_in_db(
109-
model_names: dict[str, LLM_Name],
108+
model_names: dict[str, str],
110109
env_ids: list[str] = [],
111110
tag: str | None = None,
112111
) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
@@ -164,7 +163,7 @@ def _iterate_env_agent_combo_not_in_db(
164163
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
165164
],
166165
terminal_evaluators=[
167-
ReachGoalLLMEvaluator(
166+
EpisodeLLMEvaluator(
168167
model_names["env"],
169168
EvaluationForTwoAgents[evaluation_dimensions], # type: ignore
170169
# TODO check how to do type annotation
@@ -188,7 +187,7 @@ def _iterate_env_agent_combo_not_in_db(
188187
def run_async_server_in_batch(
189188
*,
190189
batch_size: int = 1,
191-
model_names: dict[str, LLM_Name] = {
190+
model_names: dict[str, str] = {
192191
"env": "gpt-4",
193192
"agent1": "gpt-4o-mini",
194193
"agent2": "gpt-4o-mini",

examples/experimental/group_discussion_agents/group_discussion_agents.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
from sotopia.agents.llm_agent import ainput
55
from sotopia.experimental.agents.base_agent import BaseAgent
66

7-
from sotopia.generation_utils import agenerate
8-
from sotopia.generation_utils.generate import StrOutputParser
7+
from sotopia.generation_utils import agenerate, StrOutputParser
98
from sotopia.messages import ActionType
109

1110
from pydantic import Field

examples/experimental/interview_openhands/llm_agent.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212

1313
from sotopia.experimental.agents.base_agent import BaseAgent
1414

15-
from sotopia.generation_utils import agenerate
16-
from sotopia.generation_utils.generate import StrOutputParser
15+
from sotopia.generation_utils import agenerate, StrOutputParser
1716

1817
import json
1918

examples/experimental/sotopia_original_replica/llm_agent_sotopia.py

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import logging
22
import sys
3+
import json
34
from rich.logging import RichHandler
45

56
from aact import NodeFactory
67

78
from sotopia.experimental.agents.base_agent import BaseAgent
89
from sotopia.experimental.agents.datamodels import Observation, AgentAction
10+
from sotopia.database.persistent_profile import AgentProfile
11+
from typing import Any
912

10-
from sotopia.generation_utils import agenerate
11-
from sotopia.generation_utils.generate import StrOutputParser
13+
from sotopia.generation_utils import agenerate, StrOutputParser
1214

1315
# Check Python version
1416
if sys.version_info >= (3, 11):
@@ -33,11 +35,13 @@ def __init__(
3335
input_channels: list[str],
3436
output_channel: str,
3537
query_interval: int,
36-
agent_name: str,
3738
node_name: str,
38-
goal: str,
3939
model_name: str,
40-
redis_url: str,
40+
goal: str,
41+
agent_name: str = "",
42+
background: dict[str, Any] | None = None,
43+
agent_pk: str | None = None,
44+
redis_url: str = "redis://localhost:6379/0",
4145
):
4246
super().__init__(
4347
[(input_channel, Observation) for input_channel in input_channels],
@@ -47,23 +51,59 @@ def __init__(
4751
)
4852
self.output_channel = output_channel
4953
self.query_interval = query_interval
50-
self.count_ticks = 0
54+
self.count_ticks: int = 0
5155
self.message_history: list[Observation] = []
52-
self.name = agent_name
53-
self.model_name = model_name
54-
self.goal = goal
56+
self.goal: str = goal
57+
self.model_name: str = model_name
58+
self.agent_profile_pk: str | None = agent_pk
59+
self.name: str = agent_name
60+
self.background: dict[str, Any] | None = background
61+
self.awake: bool = False
62+
63+
def set_profile(self, use_pk_value: bool) -> None:
64+
if not use_pk_value:
65+
assert (
66+
self.background is not None and self.name is not None
67+
), "Background and name must be provided"
68+
if " " in self.name:
69+
first_name, last_name = self.name.split(" ", 1)
70+
else:
71+
first_name = self.name
72+
last_name = ""
73+
profile = AgentProfile(
74+
first_name=first_name, last_name=last_name, **self.background
75+
)
76+
profile.save()
77+
else:
78+
profile = AgentProfile.get(pk=self.agent_profile_pk)
79+
80+
self.agent_profile_pk = profile.pk
81+
self.name = " ".join([profile.first_name, profile.last_name]).strip()
82+
self.background = profile.model_dump()
5583

5684
def _format_message_history(self, message_history: list[Observation]) -> str:
5785
## TODO: akhatua Fix the mapping of action to be gramatically correct
5886
return "\n".join(message.to_natural_language() for message in message_history)
5987

6088
async def aact(self, obs: Observation) -> AgentAction:
6189
if obs.turn_number == -1:
90+
if self.awake:
91+
return AgentAction(
92+
agent_name=self.name,
93+
output_channel=self.output_channel,
94+
action_type="none",
95+
argument="",
96+
)
97+
args = json.loads(obs.last_turn)
98+
self.set_profile(args["use_pk_value"])
99+
self.awake = True
62100
return AgentAction(
63101
agent_name=self.name,
64102
output_channel=self.output_channel,
65103
action_type="none",
66-
argument=self.model_name,
104+
argument=json.dumps(
105+
{"pk": self.agent_profile_pk, "model_name": self.model_name}
106+
),
67107
)
68108

69109
self.message_history.append(obs)

examples/experimental/sotopia_original_replica/origin.toml

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
redis_url = "redis://localhost:6379/0"
2-
extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator"]
2+
extra_modules = ["examples.experimental.sotopia_original_replica.llm_agent_sotopia", "examples.experimental.nodes.chat_print_node", "sotopia.experimental.agents.moderator","sotopia.experimental.agents.evaluators"]
33

44

55
[[nodes]]
@@ -9,11 +9,13 @@ node_class = "moderator"
99
[nodes.node_args]
1010
output_channels = ["moderator:Jane", "moderator:Jack"]
1111
input_channels = ["Jane:moderator", "Jack:moderator"]
12-
agent_backgrounds = {"Jane" = "", "Jack" = ""}
12+
evaluator_channels = [["evaluator:moderator","moderator:evaluator"]]
1313
agent_mapping = {"moderator:Jane" = "Jane", "moderator:Jack" = "Jack"}
1414
scenario = "Two friends are sitting in a cafe and catching up with each other's lives."
15-
max_turns = 2
15+
max_turns = 3
1616
push_to_db = false
17+
evaluate_episode = true
18+
use_pk_value = false
1719

1820
[[nodes]]
1921
node_name = "Jack"
@@ -26,6 +28,8 @@ output_channel = "Jack:moderator"
2628
goal = "Your goal is to borrow 5000 dollars from Jane."
2729
model_name = "gpt-4o-mini"
2830
agent_name = "Jack"
31+
background = {"occupation" = "construction worker"}
32+
agent_pk = ""
2933

3034

3135
[[nodes]]
@@ -39,6 +43,8 @@ input_channels = ["moderator:Jane"]
3943
goal = "Your goal is to help Jack however, you are in a finicial crisis yourself and can only afford to give him 500 dollars."
4044
model_name = "gpt-4o-mini"
4145
agent_name = "Jane"
46+
background = {"occupation" = "gardener"}
47+
agent_pk = ""
4248

4349
[[nodes]]
4450
node_name = "chat_print"
@@ -50,3 +56,12 @@ node_class = "chat_print"
5056

5157
[nodes.node_args]
5258
env_agents = ["Jack", "Jane"]
59+
60+
[[nodes]]
61+
node_name = "evaluator"
62+
node_class = "evaluator"
63+
64+
[nodes.node_args]
65+
input_channels = ["moderator:evaluator"]
66+
output_channels = ["evaluator:moderator"]
67+
model_name = "gpt-4o-mini"

examples/fix_missing_episodes.py

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020
)
2121
from sotopia.envs.evaluators import (
2222
EvaluationForTwoAgents,
23-
ReachGoalLLMEvaluator,
23+
EpisodeLLMEvaluator,
2424
RuleBasedTerminatedEvaluator,
2525
SotopiaDimensions,
2626
)
2727
from sotopia.envs.parallel import ParallelSotopiaEnv
28-
from sotopia.generation_utils.generate import LLM_Name
2928
from sotopia.messages.message_classes import AgentAction, Observation
3029
from sotopia.samplers.base_sampler import BaseSampler, EnvAgentCombo
3130
from sotopia.server import run_async_server
@@ -92,10 +91,8 @@ def find_combo_pk(
9291
def get_combo_model_map(
9392
all_episodes: List[EpisodeLog],
9493
all_combos_map: Dict[str, EnvAgentComboStorage],
95-
) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
96-
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
97-
defaultdict(Counter)
98-
)
94+
) -> Dict[str, Counter[tuple[str, str, str]]]:
95+
combo_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(Counter)
9996
bad_combos = []
10097
valid_count = 0
10198
invalid_count = 0
@@ -132,9 +129,7 @@ def get_combo_model_map(
132129
all_combos_map,
133130
)
134131
if curr_combo_pk:
135-
model_pair: tuple[LLM_Name, LLM_Name, LLM_Name] = cast(
136-
tuple[LLM_Name, LLM_Name, LLM_Name], tuple(curr_ep.models)
137-
)
132+
model_pair: tuple[str, str, str] = tuple(curr_ep.models) # type: ignore
138133
combo_model_map[curr_combo_pk][model_pair] += 1
139134
valid_count += 1
140135
else:
@@ -153,8 +148,8 @@ def get_combo_model_map(
153148

154149

155150
def get_all_model_pairs(
156-
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
157-
) -> Set[tuple[LLM_Name, LLM_Name, LLM_Name]]:
151+
combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
152+
) -> Set[tuple[str, str, str]]:
158153
all_model_pairs = set()
159154
for key in combo_model_map:
160155
for combo in combo_model_map[key]:
@@ -169,12 +164,12 @@ def get_all_model_pairs(
169164

170165

171166
def get_all_missing_model_pairs(
172-
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
173-
all_model_pairs: Set[tuple[LLM_Name, LLM_Name, LLM_Name]],
167+
combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
168+
all_model_pairs: Set[tuple[str, str, str]],
174169
num_required: int,
175-
) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
176-
combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
177-
defaultdict(Counter)
170+
) -> Dict[str, Counter[tuple[str, str, str]]]:
171+
combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(
172+
Counter
178173
)
179174
missing_count = 0
180175
for key in combo_model_map:
@@ -192,9 +187,9 @@ def get_all_missing_model_pairs(
192187
# temporally used for making sure unique (env, agents, models) setting; need to change
193188
# according to the Counter in the case needing to run multiple experiments for one setting
194189
def get_missing_model_combo_map(
195-
combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
190+
combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]],
196191
all_combos_map: Dict[str, EnvAgentComboStorage],
197-
) -> Dict[tuple[LLM_Name, LLM_Name], List[tuple[str, str, str]]]:
192+
) -> Dict[tuple[str, str], List[tuple[str, str, str]]]:
198193
missing_model_combo_map = defaultdict(list)
199194
for combo_pk in combo_missing_model_map:
200195
model_counter = combo_missing_model_map[combo_pk]
@@ -216,7 +211,7 @@ def get_missing_model_combo_map(
216211

217212

218213
def yield_env_agent_combo(
219-
combo_ids: list[tuple[str, str, str]], model_names: dict[str, LLM_Name]
214+
combo_ids: list[tuple[str, str, str]], model_names: dict[str, str]
220215
) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
221216
for combo_id in combo_ids:
222217
env_id, agent_id1, agent_id2 = combo_id
@@ -229,7 +224,7 @@ def yield_env_agent_combo(
229224
RuleBasedTerminatedEvaluator(max_turn_number=20, max_stale_turn=2),
230225
],
231226
terminal_evaluators=[
232-
ReachGoalLLMEvaluator(
227+
EpisodeLLMEvaluator(
233228
model_names["env"],
234229
EvaluationForTwoAgents[SotopiaDimensions],
235230
),
@@ -249,8 +244,8 @@ def yield_env_agent_combo(
249244

250245
@gin.configurable
251246
def re_run_missing_episodes(
252-
combo_with_models: dict[tuple[LLM_Name, LLM_Name], list[tuple[str, str, str]]],
253-
model_names: dict[str, LLM_Name] = {
247+
combo_with_models: dict[tuple[str, str], list[tuple[str, str, str]]],
248+
model_names: dict[str, str] = {
254249
"env": "gpt-4",
255250
"agent1": "gpt-4o-mini",
256251
"agent2": "gpt-4o-mini",

0 commit comments

Comments
 (0)