Skip to content

Commit d4e8e0f

Browse files
homanphwchase17
andauthored
Add example for question answering over documents with OpenAI Function Agent (#6448)
This PR adds an example of doing question answering over documents using OpenAI Function Agents. #### Who can review? @hwchase17 --------- Co-authored-by: Harrison Chase <[email protected]>
1 parent 68a675c commit d4e8e0f

File tree

1 file changed

+183
-0
lines changed

1 file changed

+183
-0
lines changed
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "ec1d7a9a",
6+
"metadata": {},
7+
"source": [
8+
"# Document Comparison\n",
9+
"\n",
10+
"This notebook shows how to use an agent to compare two documents.\n",
11+
"\n",
12+
"The high level idea is we will create a question-answering chain for each document, and then use that "
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 1,
18+
"id": "8632a37c",
19+
"metadata": {},
20+
"outputs": [
21+
{
22+
"name": "stderr",
23+
"output_type": "stream",
24+
"text": [
25+
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
26+
" warnings.warn(\n"
27+
]
28+
}
29+
],
30+
"source": [
31+
"from pydantic import BaseModel, Field\n",
32+
"\n",
33+
"from langchain.chat_models import ChatOpenAI\n",
34+
"from langchain.agents import Tool\n",
35+
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
36+
"from langchain.text_splitter import CharacterTextSplitter\n",
37+
"from langchain.vectorstores import FAISS\n",
38+
"from langchain.document_loaders import PyPDFLoader\n",
39+
"from langchain.chains import RetrievalQA"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": 2,
45+
"id": "64f19917",
46+
"metadata": {},
47+
"outputs": [],
48+
"source": [
49+
"class DocumentInput(BaseModel):\n",
50+
" question: str = Field()\n",
51+
"\n",
52+
"\n",
53+
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")\n",
54+
"\n",
55+
"tools = []\n",
56+
"files = [\n",
57+
" # https://abc.xyz/investor/static/pdf/2023Q1_alphabet_earnings_release.pdf\n",
58+
" {\n",
59+
" \"name\": \"alphabet-earnings\", \n",
60+
" \"path\": \"/Users/harrisonchase/Downloads/2023Q1_alphabet_earnings_release.pdf\",\n",
61+
" }, \n",
62+
" # https://digitalassets.tesla.com/tesla-contents/image/upload/IR/TSLA-Q1-2023-Update\n",
63+
" {\n",
64+
" \"name\": \"tesla-earnings\", \n",
65+
" \"path\": \"/Users/harrisonchase/Downloads/TSLA-Q1-2023-Update.pdf\"\n",
66+
" }\n",
67+
"]\n",
68+
"\n",
69+
"for file in files:\n",
70+
" loader = PyPDFLoader(file[\"path\"])\n",
71+
" pages = loader.load_and_split()\n",
72+
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
73+
" docs = text_splitter.split_documents(pages)\n",
74+
" embeddings = OpenAIEmbeddings()\n",
75+
" retriever = FAISS.from_documents(docs, embeddings).as_retriever()\n",
76+
" \n",
77+
" # Wrap retrievers in a Tool\n",
78+
" tools.append(\n",
79+
" Tool(\n",
80+
" args_schema=DocumentInput,\n",
81+
" name=file[\"name\"], \n",
82+
" description=f\"useful when you want to answer questions about {file['name']}\",\n",
83+
" func=RetrievalQA.from_chain_type(llm=llm, retriever=retriever)\n",
84+
" )\n",
85+
" )"
86+
]
87+
},
88+
{
89+
"cell_type": "code",
90+
"execution_count": 3,
91+
"id": "eca02549",
92+
"metadata": {},
93+
"outputs": [],
94+
"source": [
95+
"from langchain.agents import initialize_agent\n",
96+
"from langchain.agents import AgentType"
97+
]
98+
},
99+
{
100+
"cell_type": "code",
101+
"execution_count": 4,
102+
"id": "c4d56c25",
103+
"metadata": {},
104+
"outputs": [
105+
{
106+
"name": "stdout",
107+
"output_type": "stream",
108+
"text": [
109+
"\n",
110+
"\n",
111+
"\u001b[1m> Entering new chain...\u001b[0m\n",
112+
"\u001b[32;1m\u001b[1;3m\n",
113+
"Invoking: `alphabet-earnings` with `{'question': 'revenue'}`\n",
114+
"\n",
115+
"\n",
116+
"\u001b[0m\u001b[36;1m\u001b[1;3m{'query': 'revenue', 'result': 'The revenue for Alphabet Inc. in the first quarter of 2023 was $69,787 million.'}\u001b[0m\u001b[32;1m\u001b[1;3m\n",
117+
"Invoking: `tesla-earnings` with `{'question': 'revenue'}`\n",
118+
"\n",
119+
"\n",
120+
"\u001b[0m\u001b[33;1m\u001b[1;3m{'query': 'revenue', 'result': 'Total revenue for Q1-2023 was $23.3 billion.'}\u001b[0m\u001b[32;1m\u001b[1;3mAlphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.\u001b[0m\n",
121+
"\n",
122+
"\u001b[1m> Finished chain.\u001b[0m\n"
123+
]
124+
},
125+
{
126+
"data": {
127+
"text/plain": [
128+
"{'input': 'did alphabet or tesla have more revenue?',\n",
129+
" 'output': 'Alphabet Inc. had more revenue than Tesla. In the first quarter of 2023, Alphabet Inc. had a revenue of $69,787 million, while Tesla had a revenue of $23.3 billion.'}"
130+
]
131+
},
132+
"execution_count": 4,
133+
"metadata": {},
134+
"output_type": "execute_result"
135+
}
136+
],
137+
"source": [
138+
"llm = ChatOpenAI(\n",
139+
" temperature=0,\n",
140+
" model=\"gpt-3.5-turbo-0613\", \n",
141+
")\n",
142+
"\n",
143+
"agent = initialize_agent(\n",
144+
" agent=AgentType.OPENAI_FUNCTIONS,\n",
145+
" tools=tools,\n",
146+
" llm=llm,\n",
147+
" verbose=True,\n",
148+
")\n",
149+
"\n",
150+
"agent({\"input\": \"did alphabet or tesla have more revenue?\"})"
151+
]
152+
},
153+
{
154+
"cell_type": "code",
155+
"execution_count": null,
156+
"id": "6db4c853",
157+
"metadata": {},
158+
"outputs": [],
159+
"source": []
160+
}
161+
],
162+
"metadata": {
163+
"kernelspec": {
164+
"display_name": "Python 3 (ipykernel)",
165+
"language": "python",
166+
"name": "python3"
167+
},
168+
"language_info": {
169+
"codemirror_mode": {
170+
"name": "ipython",
171+
"version": 3
172+
},
173+
"file_extension": ".py",
174+
"mimetype": "text/x-python",
175+
"name": "python",
176+
"nbconvert_exporter": "python",
177+
"pygments_lexer": "ipython3",
178+
"version": "3.9.1"
179+
}
180+
},
181+
"nbformat": 4,
182+
"nbformat_minor": 5
183+
}

0 commit comments

Comments
 (0)