Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions src/langchain/evals/gibwork/multi_create_gibwork_task.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";

const DATASET: ComplexEvalDataset[] = [
{
description: "Multi-turn create Gibwork task",
inputs: {
query: "I need to create a new Gibwork task",
},
turns: [
{
input: "I need to create a new Gibwork task",
expectedResponse: "Sure, please provide the task title or details.",
},
{
input: "The task is titled 'Fix my website'",
expectedResponse:
"Understood, 'Fix my website' is the task title. Any more details?",
},
{
input: "Also, what's the current price of JUP?",
expectedToolCall: {
tool: "solana_token_data",
params: "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN",
},
},
{
input:
"The Gibwork job should be for 1000 JUP tokens with no extra content.",
expectedResponse:
"Okay, 1000 JUP tokens, no additional content. Any requirements or tags?",
},
{
input: "Set content and requirements to N/A and tag it as webdev",
expectedToolCall: {
tool: "create_gibwork_task",
params: {
title: "Fix my website",
content: "N/A",
requirements: "N/A",
tags: ["webdev"],
tokenMintAddress: "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN",
amount: 10,
},
},
},
],
},
];

runComplexEval(DATASET, "Multi-turn Create Gibwork Task test");
48 changes: 48 additions & 0 deletions src/langchain/evals/jupiter/multi_token_data.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";

const DATASET: ComplexEvalDataset[] = [
{
description: "Multi-turn token data inquiry",
inputs: {
query: "What's the price of KING?",
},
turns: [
{
input: "What's the price of KING?",
expectedResponse: "Sure, can you provide the mint address of KING?",
},
{
input:
"The mint address is 5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
expectedToolCall: {
tool: "solana_token_data",
params: "5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
},
},
{
input: "Buy 20 tokens using USDC",
expectedToolCall: {
tool: "solana_trade",
params: {
inputAmount: 20,
inputMint: "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v",
outputMint: "5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
slippageBps: 100,
},
},
},
{
input: "And check my KING balance",
expectedToolCall: {
tool: "solana_balance_other",
params: {
walletAddress: "GZbQmKYYzwjP3nbdqRWPLn98ipAni9w5eXMGp7bmZbGB",
tokenAddress: "5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
},
},
},
],
},
];

runComplexEval(DATASET, "Multi-turn Token Data test");
44 changes: 44 additions & 0 deletions src/langchain/evals/jupiter/multi_token_swap.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";

const DATASET: ComplexEvalDataset[] = [
{
description: "Multi-turn token swap",
inputs: {
query: "I want to swap some tokens",
},
turns: [
{
input: "I want to swap some tokens",
expectedResponse: "Sure, which tokens would you like to swap?",
},
{
input: "I want to exchange USDC for JUP tokens",
expectedResponse: "How much USDC?",
},
{
input: "Swap 10 USDC for JUP with 1% slippage",
expectedToolCall: {
tool: "solana_trade",
params: {
outputMint: "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN",
inputAmount: 10,
inputMint: "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v",
slippageBps: 100,
},
},
},
{
input:
"Then check the USDC balance of GZbQmKYYzwjP3nbdqRWPLn98ipAni9w5eXMGp7bmZbGB",
expectedToolCall: {
tool: "solana_balance_other",
params: {
tokenAddress: "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v",
},
},
},
],
},
];

runComplexEval(DATASET, "Multi-turn Token Swap test");
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";

const DATASET: ComplexEvalDataset[] = [
{
description: "Multi-turn NFT collection deployment",
inputs: {
query: "I want to deploy an NFT collection",
},
turns: [
{
input: "I want to deploy an NFT collection",
expectedResponse:
"Sure, what's the name of your collection? I also need the metadata URI and royalty basis points.",
},
{
input: "The collection should be named MyCollection",
expectedResponse: "Got it. Metadata URI and royalty basis points?",
},
{
input:
"Its metadata URI is https://metadata.mycoll.io/collection.json. Set the royalty to 250 basis points",
expectedToolCall: {
tool: "solana_deploy_collection",
params: {
name: "MyCollection",
uri: "https://metadata.mycoll.io/collection.json",
royaltyBasisPoints: 250,
},
},
},
{
input: "Also, retrieve the deployed collection details",
expectedToolCall: {
tool: "solana_get_asset",
params: { collection: "MyCollection" },
},
},
],
},
];

runComplexEval(DATASET, "Multi-turn solana_deploy_collection test");
Loading
Loading