Add example for question answering over documents with OpenAI Function Agent (#6448)

homanp · hwchase17 · web-flow · commit d4e8e0f5ab9c · 2023-06-19T21:35:45.000-07:00
This PR adds an example of doing question answering over documents using OpenAI Function Agents. #### Who can review? @hwchase17 --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
diff --git a/docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb b/docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb
@@ -0,0 +1,183 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ec1d7a9a",
+   "metadata": {},
+   "source": [
+    "# Document Comparison\n",
+    "\n",
+    "This notebook shows how to use an agent to compare two documents.\n",
+    "\n",
+    "The high level idea is we will create a question-answering chain for each document, and then use that "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "8632a37c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pydantic import BaseModel, Field\n",
+    "\n",
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.agents import Tool\n",
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "from langchain.vectorstores import FAISS\n",
+    "from langchain.document_loaders import PyPDFLoader\n",
+    "from langchain.chains import RetrievalQA"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "64f19917",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class DocumentInput(BaseModel):\n",
+    "    question: str = Field()\n",
+    "\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
+    "\n",
+    "tools = []\n",
+    "files = [\n",
+    "    # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
+    "    {\n",
+    "        \"name\": \"alphabet-earnings\", \n",
+    "        \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
+    "    }, \n",
+    "    # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
+    "    {\n",
+    "        \"name\": \"tesla-earnings\", \n",
+    "        \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
+    "    }\n",
+    "]\n",
+    "\n",
+    "for file in files:\n",
+    "    loader = PyPDFLoader(file[\"path\"])\n",
+    "    pages = loader.load_and_split()\n",
+    "    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+    "    docs = text_splitter.split_documents(pages)\n",
+    "    embeddings = OpenAIEmbeddings()\n",
+    "    retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
+    "    \n",
+    "    # Wrap retrievers in a Tool\n",
+    "    tools.append(\n",
+    "        Tool(\n",
+    "            args_schema=DocumentInput,\n",
+    "            name=file[\"name\"], \n",
+    "            description=f\"useful when you want to answer questions about {file['name']}\",\n",
+    "            func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
+    "        )\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "eca02549",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.agents import initialize_agent\n",
+    "from langchain.agents import AgentType"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c4d56c25",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new  chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
+      "Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n",
+      "\n",
+      "\n",
+      "\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'input': 'did alphabet or tesla have more revenue?',\n",
+       " 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "llm = ChatOpenAI(\n",
+    "    temperature=0,\n",
+    "    model=\"gpt-3.5-turbo-0613\", \n",
+    ")\n",
+    "\n",
+    "agent = initialize_agent(\n",
+    "    agent=AgentType.OPENAI_FUNCTIONS,\n",
+    "    tools=tools,\n",
+    "    llm=llm,\n",
+    "    verbose=True,\n",
+    ")\n",
+    "\n",
+    "agent({\"input\": \"did alphabet or tesla have more revenue?\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6db4c853",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}