Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions docs/extras/modules/agents/toolkits/document_comparison_toolkit.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ec1d7a9a",
"metadata": {},
"source": [
"# Document Comparison\n",
"\n",
"This notebook shows how to use an agent to compare two documents.\n",
"\n",
"The high level idea is we will create a question-answering chain for each document, and then use that "
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8632a37c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
" warnings.warn(\n"
]
}
],
"source": [
"from pydantic import BaseModel, Field\n",
"\n",
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.agents import Tool\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.vectorstores import FAISS\n",
"from langchain.document_loaders import PyPDFLoader\n",
"from langchain.chains import RetrievalQA"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "64f19917",
"metadata": {},
"outputs": [],
"source": [
"class DocumentInput(BaseModel):\n",
" question: str = Field()\n",
"\n",
"\n",
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
"\n",
"tools = []\n",
"files = [\n",
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
" {\n",
" \"name\": \"alphabet-earnings\", \n",
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
" }, \n",
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
" {\n",
" \"name\": \"tesla-earnings\", \n",
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
" }\n",
"]\n",
"\n",
"for file in files:\n",
" loader = PyPDFLoader(file[\"path\"])\n",
" pages = loader.load_and_split()\n",
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
" docs = text_splitter.split_documents(pages)\n",
" embeddings = OpenAIEmbeddings()\n",
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
" \n",
" # Wrap retrievers in a Tool\n",
" tools.append(\n",
" Tool(\n",
" args_schema=DocumentInput,\n",
" name=file[\"name\"], \n",
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
" )\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "eca02549",
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents import initialize_agent\n",
"from langchain.agents import AgentType"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c4d56c25",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
"Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{'input': 'did alphabet or tesla have more revenue?',\n",
" 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"llm = ChatOpenAI(\n",
" temperature=0,\n",
" model=\"gpt-3.5-turbo-0613\", \n",
")\n",
"\n",
"agent = initialize_agent(\n",
" agent=AgentType.OPENAI_FUNCTIONS,\n",
" tools=tools,\n",
" llm=llm,\n",
" verbose=True,\n",
")\n",
"\n",
"agent({\"input\": \"did alphabet or tesla have more revenue?\"})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6db4c853",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}