sendaifun · thearyanag · Apr 9, 2025 · Mar 25, 2025 · Mar 26, 2025 · Apr 9, 2025
diff --git a/src/langchain/evals/gibwork/multi_create_gibwork_task.eval.ts b/src/langchain/evals/gibwork/multi_create_gibwork_task.eval.ts
@@ -0,0 +1,50 @@
+import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";
+
+const DATASET: ComplexEvalDataset[] = [
+  {
+    description: "Multi-turn create Gibwork task",
+    inputs: {
+      query: "I need to create a new Gibwork task",
+    },
+    turns: [
+      {
+        input: "I need to create a new Gibwork task",
+        expectedResponse: "Sure, please provide the task title or details.",
+      },
+      {
+        input: "The task is titled 'Fix my website'",
+        expectedResponse:
+          "Understood, 'Fix my website' is the task title. Any more details?",
+      },
+      {
+        input: "Also, what's the current price of JUP?",
+        expectedToolCall: {
+          tool: "solana_token_data",
+          params: "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN",
+        },
+      },
+      {
+        input:
+          "The Gibwork job should be for 1000 JUP tokens with no extra content.",
+        expectedResponse:
+          "Okay, 1000 JUP tokens, no additional content. Any requirements or tags?",
+      },
+      {
+        input: "Set content and requirements to N/A and tag it as webdev",
+        expectedToolCall: {
+          tool: "create_gibwork_task",
+          params: {
+            title: "Fix my website",
+            content: "N/A",
+            requirements: "N/A",
+            tags: ["webdev"],
+            tokenMintAddress: "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN",
+            amount: 10,
+          },
+        },
+      },
+    ],
+  },
+];
+
+runComplexEval(DATASET, "Multi-turn Create Gibwork Task test");
diff --git a/src/langchain/evals/jupiter/multi_token_data.eval.ts b/src/langchain/evals/jupiter/multi_token_data.eval.ts
@@ -0,0 +1,48 @@
+import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";
+
+const DATASET: ComplexEvalDataset[] = [
+  {
+    description: "Multi-turn token data inquiry",
+    inputs: {
+      query: "What's the price of KING?",
+    },
+    turns: [
+      {
+        input: "What's the price of KING?",
+        expectedResponse: "Sure, can you provide the mint address of KING?",
+      },
+      {
+        input:
+          "The mint address is 5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
+        expectedToolCall: {
+          tool: "solana_token_data",
+          params: "5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
+        },
+      },
+      {
+        input: "Buy 20 tokens using USDC",
+        expectedToolCall: {
+          tool: "solana_trade",
+          params: {
+            inputAmount: 20,
+            inputMint: "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v",
+            outputMint: "5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
+            slippageBps: 100,
+          },
+        },
+      },
+      {
+        input: "And check my KING balance",
+        expectedToolCall: {
+          tool: "solana_balance_other",
+          params: {
+            walletAddress: "GZbQmKYYzwjP3nbdqRWPLn98ipAni9w5eXMGp7bmZbGB",
+            tokenAddress: "5eqNDjbsWL9hfAqUfhegTxgEa3XardzGdVAboMA4pump",
+          },
+        },
+      },
+    ],
+  },
+];
+
+runComplexEval(DATASET, "Multi-turn Token Data test");
diff --git a/src/langchain/evals/jupiter/multi_token_swap.eval.ts b/src/langchain/evals/jupiter/multi_token_swap.eval.ts
@@ -0,0 +1,44 @@
+import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";
+
+const DATASET: ComplexEvalDataset[] = [
+  {
+    description: "Multi-turn token swap",
+    inputs: {
+      query: "I want to swap some tokens",
+    },
+    turns: [
+      {
+        input: "I want to swap some tokens",
+        expectedResponse: "Sure, which tokens would you like to swap?",
+      },
+      {
+        input: "I want to exchange USDC for JUP tokens",
+        expectedResponse: "How much USDC?",
+      },
+      {
+        input: "Swap 10 USDC for JUP with 1% slippage",
+        expectedToolCall: {
+          tool: "solana_trade",
+          params: {
+            outputMint: "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN",
+            inputAmount: 10,
+            inputMint: "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v",
+            slippageBps: 100,
+          },
+        },
+      },
+      {
+        input:
+          "Then check the USDC balance of GZbQmKYYzwjP3nbdqRWPLn98ipAni9w5eXMGp7bmZbGB",
+        expectedToolCall: {
+          tool: "solana_balance_other",
+          params: {
+            tokenAddress: "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v",
+          },
+        },
+      },
+    ],
+  },
+];
+
+runComplexEval(DATASET, "Multi-turn Token Swap test");
diff --git a/src/langchain/evals/metaplex/multi_solana_deploy_collection.eval.ts b/src/langchain/evals/metaplex/multi_solana_deploy_collection.eval.ts
@@ -0,0 +1,42 @@
+import { runComplexEval, ComplexEvalDataset } from "../utils/runEvals";
+
+const DATASET: ComplexEvalDataset[] = [
+  {
+    description: "Multi-turn NFT collection deployment",
+    inputs: {
+      query: "I want to deploy an NFT collection",
+    },
+    turns: [
+      {
+        input: "I want to deploy an NFT collection",
+        expectedResponse:
+          "Sure, what's the name of your collection? I also need the metadata URI and royalty basis points.",
+      },
+      {
+        input: "The collection should be named MyCollection",
+        expectedResponse: "Got it. Metadata URI and royalty basis points?",
+      },
+      {
+        input:
+          "Its metadata URI is https://metadata.mycoll.io/collection.json. Set the royalty to 250 basis points",
+        expectedToolCall: {
+          tool: "solana_deploy_collection",
+          params: {
+            name: "MyCollection",
+            uri: "https://metadata.mycoll.io/collection.json",
+            royaltyBasisPoints: 250,
+          },
+        },
+      },
+      {
+        input: "Also, retrieve the deployed collection details",
+        expectedToolCall: {
+          tool: "solana_get_asset",
+          params: { collection: "MyCollection" },
+        },
+      },
+    ],
+  },
+];
+
+runComplexEval(DATASET, "Multi-turn solana_deploy_collection test");