From 9d45d7521b09f21bda8c5ea902164ff5ebda7ced Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 14 Nov 2023 08:36:33 -0800 Subject: [PATCH 1/8] proposed format for standardized EF pages --- docs/embeddings/openai.md | 99 +++++++++++++++++++++++++-------------- src/css/custom.css | 51 +++++++++++++++++--- 2 files changed, 108 insertions(+), 42 deletions(-) diff --git a/docs/embeddings/openai.md b/docs/embeddings/openai.md index 8510205..986bb63 100644 --- a/docs/embeddings/openai.md +++ b/docs/embeddings/openai.md @@ -3,59 +3,86 @@ # OpenAI -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +Chroma provides a convenient wrapper around OpenAI's embedding API. This embedding function runs remotely on OpenAI's servers, and requires an API key. You can get an API key by signing up for an account at [OpenAI](https://openai.com/api/). -
Select a language
+
- - - - +| Models | Input | Dimensionality | Context Size| Default | +|--|--|--|--|--| +|`ada-002` | English | 1536 | 2048 | โœ… | -Chroma provides a convenient wrapper around OpenAI's embedding API. This embedding function runs remotely on OpenAI's servers, and requires an API key. You can get an API key by signing up for an account at [OpenAI](https://openai.com/api/). +## Basic Usage - - +### Python -This embedding function relies on the `openai` python package, which you can install with `pip install openai`. +```bash +pip install openai +``` ```python -openai_ef = embedding_functions.OpenAIEmbeddingFunction( - api_key="YOUR_API_KEY", - model_name="text-embedding-ada-002" - ) + +from chromadb.utils import embedding_functions + +embedder = embedding_functions.OpenAIEmbeddingFunction( + api_key="YOUR_API_KEY") + +collection = client.create_collection( + name="oai_ef", + embedding_function=embedder) ``` -To use the OpenAI embedding models on other platforms such as Azure, you can use the `api_base` and `api_type` parameters: +### Javascript + +```bash +yarn add openai +``` + +```javascript +import { ChromaClient, OpenAIEmbeddingFunction } from 'chromadb' + +const embedder = new OpenAIEmbeddingFunction({ + openai_api_key: "YOUR_API_KEY" +}) + +const collection = await client.createCollection({ + name: "oai_ef", + embeddingFunction: embedder +}) +``` + +## Advanced Usage + +### Call directly + +By passing the embedding function to a Collection, Chroma handles the embedding of documents and queries for you. However in some cases you may want to generate the embeddings outside and handle them yourself. + +#### Python + ```python -openai_ef = embedding_functions.OpenAIEmbeddingFunction( - api_key="YOUR_API_KEY", - api_base="YOUR_API_BASE_PATH", - api_type="azure", - api_version="YOUR_API_VERSION", - model_name="text-embedding-ada-002" - ) +embeddings = embedder(["document1","document2"]) +# [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] ``` - - +#### Javascript ```javascript -const {OpenAIEmbeddingFunction} = require('chromadb'); -const embedder = new OpenAIEmbeddingFunction({openai_api_key: "apiKey"}) - -// use directly const embeddings = embedder.generate(["document1","document2"]) - -// pass documents to query for .add and .query -const collection = await client.createCollection({name: "name", embeddingFunction: embedder}) -const collection = await client.getCollection({name: "name", embeddingFunction: embedder}) +// [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] ``` - +### Using a different model - +You can pass in an optional `model_name` argument, which lets you choose which OpenAI embeddings model to use. By default, Chroma uses `text-embedding-ada-002`. You can see a list of all available models [here](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings). +### Run with Azure -You can pass in an optional `model_name` argument, which lets you choose which OpenAI embeddings model to use. By default, Chroma uses `text-embedding-ada-002`. You can see a list of all available models [here](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings). \ No newline at end of file +To use the OpenAI embedding models on other platforms such as Azure, you can use the `api_base` and `api_type` parameters: +```python +openai_ef = embedding_functions.OpenAIEmbeddingFunction( + api_key="YOUR_API_KEY", + api_base="YOUR_API_BASE_PATH", + api_type="azure", + api_version="YOUR_API_VERSION", + model_name="text-embedding-ada-002" + ) +``` \ No newline at end of file diff --git a/src/css/custom.css b/src/css/custom.css index b512705..d65c93b 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -406,7 +406,10 @@ article p a { font-size: 1.5rem !important; } h3 { - font-size: 1rem !important; + font-size: 1.2rem !important; + } + h4 { + font-size: 0.9rem !important; } article { max-width: 700px; @@ -483,12 +486,8 @@ article p a { div.special_table + table { border: none; - - /* border-collapse: separate; */ - /* border-spacing: 0px; */ } - div.special_table + table thead { background: rgba(120,120,120, 0.1); border-top-right-radius: 10px; @@ -519,6 +518,8 @@ div.special_table + table, th, td { border-width: 0px !important; } + + .custom-tag { display: inline; background-color: #f0f0f0; @@ -588,4 +589,42 @@ div.special_table + table, th, td { .main-wrapper { min-height: 100vh; -} \ No newline at end of file +} + + +div.data_table + table { + border: none; + padding-top: 20px; + padding-bottom: 20px; +} + +div.data_table + table thead { + background: rgba(120,120,120, 0.1); + border-top-right-radius: 10px; + overflow: hidden; +} + +div.data_table + table thead tr { + background: rgba(255, 255, 255, 0.1); + border-top: 0px; + border-bottom: 0px; + text-align: left; +} +div.data_table + table tr th { + background: rgba(255, 255, 255, 0); + color: #000; + font-weight: 600; + padding: 5px 20px; +} +div.data_table + table tr td { + padding: 5px 20px; + text-align: left; +} + +div.data_table + table tr:nth-child(even) { + background: rgba(255, 255, 255, 0); +} + +div.data_table + table, th, td { + border-width: 0px !important; +} From e6af460153bf9e26aad9481b611e659663c81160 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 14 Nov 2023 08:54:17 -0800 Subject: [PATCH 2/8] proposed new format for integrations --- docs/integrations/langchain.md | 102 ++++++++++++++++++-------------- docs/integrations/langchain2.md | 56 ++++++++++++++++++ 2 files changed, 112 insertions(+), 46 deletions(-) create mode 100644 docs/integrations/langchain2.md diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md index c5d9eb1..b68c4d1 100644 --- a/docs/integrations/langchain.md +++ b/docs/integrations/langchain.md @@ -3,54 +3,64 @@ slug: /integrations/langchain title: ๐Ÿฆœ๏ธ๐Ÿ”— Langchain --- -## Langchain - Python +LangChain is a popular open-source framework for developing applications powered by language models. -- [LangChain + Chroma](https://blog.langchain.dev/langchain-chroma/) on the LangChain blog -- [Harrison's `chroma-langchain` demo repo](https://github.com/hwchase17/chroma-langchain) - - [question answering over documents](https://github.com/hwchase17/chroma-langchain/blob/master/qa.ipynb) - ([Replit version](https://replit.com/@swyx/LangChainChromaStarter#main.py)) - - [to use Chroma as a persistent database](https://github.com/hwchase17/chroma-langchain/blob/master/persistent-qa.ipynb) -- Tutorials - - [Chroma and LangChain tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) - The demo showcases how to pull data from the English Wikipedia using their API. The project also demonstrates how to vectorize data in chunks and get embeddings using OpenAI embeddings model. - - [Create a Voice-based ChatGPT Clone That Can Search on the Internet and local files](https://betterprogramming.pub/how-to-create-a-voice-based-chatgpt-clone-that-can-search-on-the-internet-24d7f570ea8) -- [LangChain's Chroma Documentation](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html?highlight=chroma#langchain.vectorstores.Chroma) +> Insert star counter... other stuff like that + +### Install + +`pip install langchain` / `yarn add langchain` + +### Main Benefits + +- 1 +- 2 +- 3 + +### Simple Example + +#### Python + +```python +import chromadb +from langchain.vectorstores import Chroma +from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings + +# Chroma code +persistent_client = chromadb.PersistentClient() +collection = persistent_client.get_or_create_collection("collection_name") +collection.add(ids=["1", "2", "3"], documents=["a", "b", "c"]) +# LangChain Code +embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") -## Langchain - JS - -Here is an [example in LangChainJS](https://github.com/hwchase17/langchainjs/blob/main/examples/src/chains/chat_vector_db_chroma.ts) - -```javascript -import { OpenAI } from "langchain/llms/openai"; -import { ConversationalRetrievalQAChain } from "langchain/chains"; -import { Chroma } from "langchain/vectorstores/chroma"; -import { OpenAIEmbeddings } from "langchain/embeddings/openai"; -import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; -import * as fs from "fs"; - -// to run this first run a chroma server with `chroma run --path /path/to/data` - -export const run = async () => { - /* Initialize the LLM to use to answer the question */ - const model = new OpenAI(); - /* Load in the file we want to do question answering over */ - const text = fs.readFileSync("state_of_the_union.txt", "utf8"); - /* Split the text into chunks */ - const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); - const docs = await textSplitter.createDocuments([text]); - /* Create the vectorstore */ - const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), { - collectionName: "state_of_the_union", - }); - /* Create the chain */ - const chain = ConversationalRetrievalQAChain.fromLLM( - model, - vectorStore.asRetriever() - ); - /* Ask it a question */ - const question = "What did the president say about Justice Breyer?"; - const res = await chain.call({ question, chat_history: [] }); - console.log(res); -}; +langchain_chroma = Chroma( + client=persistent_client, + collection_name="collection_name", + embedding_function=embedding_function, +) +# Important! - the embedding functiion passed to langchain is their wrapper, not Chroma's + + +print("There are", langchain_chroma._collection.count(), "in the collection") ``` -- [LangChainJS Chroma Documentation](https://js.langchain.com/docs/modules/indexes/vector_stores/integrations/chroma) \ No newline at end of file +#### Javascript + +```js +// stuff goes here +``` + +### Resources + +- [LangChain + Chroma Announcement Post](https://blog.langchain.dev/langchain-chroma/) on the LangChain blog +- [LangChain's Chroma Documentation](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html?highlight=chroma#langchain.vectorstores.Chroma) + +#### Tutorials + + - [Chroma and LangChain tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) - The demo showcases how to pull data from the English Wikipedia using their API. The project also demonstrates how to vectorize data in chunks and get embeddings using OpenAI embeddings model. + - [Create a Voice-based ChatGPT Clone That Can Search on the Internet and local files](https://betterprogramming.pub/how-to-create-a-voice-based-chatgpt-clone-that-can-search-on-the-internet-24d7f570ea8) +- [Harrison's `chroma-langchain` demo repo](https://github.com/hwchase17/chroma-langchain) + - [question answering over documents](https://github.com/hwchase17/chroma-langchain/blob/master/qa.ipynb) - ([Replit version](https://replit.com/@swyx/LangChainChromaStarter#main.py)) + - [to use Chroma as a persistent database](https://github.com/hwchase17/chroma-langchain/blob/master/persistent-qa.ipynb) + \ No newline at end of file diff --git a/docs/integrations/langchain2.md b/docs/integrations/langchain2.md new file mode 100644 index 0000000..c5d9eb1 --- /dev/null +++ b/docs/integrations/langchain2.md @@ -0,0 +1,56 @@ +--- +slug: /integrations/langchain +title: ๐Ÿฆœ๏ธ๐Ÿ”— Langchain +--- + +## Langchain - Python + +- [LangChain + Chroma](https://blog.langchain.dev/langchain-chroma/) on the LangChain blog +- [Harrison's `chroma-langchain` demo repo](https://github.com/hwchase17/chroma-langchain) + - [question answering over documents](https://github.com/hwchase17/chroma-langchain/blob/master/qa.ipynb) - ([Replit version](https://replit.com/@swyx/LangChainChromaStarter#main.py)) + - [to use Chroma as a persistent database](https://github.com/hwchase17/chroma-langchain/blob/master/persistent-qa.ipynb) +- Tutorials + - [Chroma and LangChain tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) - The demo showcases how to pull data from the English Wikipedia using their API. The project also demonstrates how to vectorize data in chunks and get embeddings using OpenAI embeddings model. + - [Create a Voice-based ChatGPT Clone That Can Search on the Internet and local files](https://betterprogramming.pub/how-to-create-a-voice-based-chatgpt-clone-that-can-search-on-the-internet-24d7f570ea8) +- [LangChain's Chroma Documentation](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html?highlight=chroma#langchain.vectorstores.Chroma) + + +## Langchain - JS + +Here is an [example in LangChainJS](https://github.com/hwchase17/langchainjs/blob/main/examples/src/chains/chat_vector_db_chroma.ts) + +```javascript +import { OpenAI } from "langchain/llms/openai"; +import { ConversationalRetrievalQAChain } from "langchain/chains"; +import { Chroma } from "langchain/vectorstores/chroma"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import * as fs from "fs"; + +// to run this first run a chroma server with `chroma run --path /path/to/data` + +export const run = async () => { + /* Initialize the LLM to use to answer the question */ + const model = new OpenAI(); + /* Load in the file we want to do question answering over */ + const text = fs.readFileSync("state_of_the_union.txt", "utf8"); + /* Split the text into chunks */ + const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); + const docs = await textSplitter.createDocuments([text]); + /* Create the vectorstore */ + const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), { + collectionName: "state_of_the_union", + }); + /* Create the chain */ + const chain = ConversationalRetrievalQAChain.fromLLM( + model, + vectorStore.asRetriever() + ); + /* Ask it a question */ + const question = "What did the president say about Justice Breyer?"; + const res = await chain.call({ question, chat_history: [] }); + console.log(res); +}; +``` + +- [LangChainJS Chroma Documentation](https://js.langchain.com/docs/modules/indexes/vector_stores/integrations/chroma) \ No newline at end of file From fa3b0fcfa6044def0710fe25e4d102bddfb209a0 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 19 Dec 2023 10:39:23 -0800 Subject: [PATCH 3/8] add format to cohere as well --- docs/embeddings/cohere.md | 96 ++++++++++++++++++++++++--------------- docs/embeddings/openai.md | 4 +- src/css/custom.css | 1 + 3 files changed, 63 insertions(+), 38 deletions(-) diff --git a/docs/embeddings/cohere.md b/docs/embeddings/cohere.md index 8831c2c..26805cf 100644 --- a/docs/embeddings/cohere.md +++ b/docs/embeddings/cohere.md @@ -3,56 +3,88 @@ # Cohere -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +Chroma also provides a convenient wrapper around Cohere's embedding API. This embedding function runs remotely on Cohereโ€™s servers, and requires an API key. You can get an API key by signing up for an account at [Cohere](https://dashboard.cohere.ai/welcome/register). -
Select a language
+
- - - - +| Models | Input | Dimensionality | Context Size| +|--|--|--|--|--| +|`embed-english-v3.0` | English | 1024 | 512 (recommended) | +|`embed-multilingual-v3.0` | [Full List](https://docs.cohere.com/docs/supported-languages) | 1024 | 512 (recommended) | +|`embed-english-light-v3.0` | English | 384 | 512 (recommended) | +|`embed-multilingual-light-v3.0` | [Full List](https://docs.cohere.com/docs/supported-languages) | 384 | 512 (recommended) | +|`embed-english-v2.0` | English | 4096 | 512 (recommended) | +|`embed-english-light-v2.0` | English | 1024 | 512 (recommended) | +|`embed-multilingual-v2.0` | [Full List](https://docs.cohere.com/docs/supported-languages) | 768 | 512 (recommended) | -Chroma also provides a convenient wrapper around Cohere's embedding API. This embedding function runs remotely on Cohereโ€™s servers, and requires an API key. You can get an API key by signing up for an account at [Cohere](https://dashboard.cohere.ai/welcome/register). - - +## Basic Usage -This embedding function relies on the `cohere` python package, which you can install with `pip install cohere`. +### Python + +```bash +pip install cohere +``` ```python -cohere_ef = embedding_functions.CohereEmbeddingFunction(api_key="YOUR_API_KEY", model_name="large") -cohere_ef(texts=["document1","document2"]) + +from chromadb.utils import embedding_functions + +embedder = embedding_functions.CohereEmbeddingFunction( + api_key="YOUR_API_KEY") + +collection = client.create_collection( + name="cohere_ef", + embedding_function=embedder) ``` - - +### Javascript + +```bash +yarn add cohere-ai +``` ```javascript -const {CohereEmbeddingFunction} = require('chromadb'); -const embedder = new CohereEmbeddingFunction("apiKey") +import { ChromaClient, CohereEmbeddingFunction } from 'chromadb' -// use directly -const embeddings = embedder.generate(["document1","document2"]) +const embedder = new CohereEmbeddingFunction({ + apiKey: "YOUR_API_KEY" +}) -// pass documents to query for .add and .query -const collection = await client.createCollection({name: "name", embeddingFunction: embedder}) -const collectionGet = await client.getCollection({name:"name", embeddingFunction: embedder}) +const collection = await client.createCollection({ + name: "cohere_ef", + embeddingFunction: embedder +}) ``` - +## Advanced Usage - +### Call directly +By passing the embedding function to a Collection, Chroma handles the embedding of documents and queries for you. However in some cases you may want to generate the embeddings outside and handle them yourself. +#### Python + +```python +embeddings = embedder(["document1","document2"]) +# [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] +``` + +#### Javascript + +```javascript +const embeddings = embedder.generate(["document1","document2"]) +// [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] +``` + +### Using a different model You can pass in an optional `model_name` argument, which lets you choose which Cohere embeddings model to use. By default, Chroma uses `large` model. You can see the available models under `Get embeddings` section [here](https://docs.cohere.ai/reference/embed). ### Multilingual model example - - +#### Python ```python cohere_ef = embedding_functions.CohereEmbeddingFunction( @@ -69,11 +101,10 @@ cohere_ef(texts=multilingual_texts) ``` - - +#### Javascript ```javascript -const {CohereEmbeddingFunction} = require('chromadb'); +import { CohereEmbeddingFunction } from 'chromadb' const embedder = new CohereEmbeddingFunction("apiKey") multilingual_texts = [ 'Hello from Cohere!', 'ู…ุฑุญุจู‹ุง ู…ู† ูƒูˆู‡ูŠุฑ!', @@ -86,11 +117,4 @@ const embeddings = embedder.generate(multilingual_texts) ``` - - - - - - - For more information on multilingual model you can read [here](https://docs.cohere.ai/docs/multilingual-language-models). \ No newline at end of file diff --git a/docs/embeddings/openai.md b/docs/embeddings/openai.md index 986bb63..a0aba31 100644 --- a/docs/embeddings/openai.md +++ b/docs/embeddings/openai.md @@ -7,9 +7,9 @@ Chroma provides a convenient wrapper around OpenAI's embedding API. This embeddi
-| Models | Input | Dimensionality | Context Size| Default | +| Models | Input | Dimensionality | Context Size| |--|--|--|--|--| -|`ada-002` | English | 1536 | 2048 | โœ… | +|`ada-002` | English | 1536 | 2048 | ## Basic Usage diff --git a/src/css/custom.css b/src/css/custom.css index d65c93b..1efcb3e 100644 --- a/src/css/custom.css +++ b/src/css/custom.css @@ -596,6 +596,7 @@ div.data_table + table { border: none; padding-top: 20px; padding-bottom: 20px; + zoom: 0.8; } div.data_table + table thead { From a7100999aeb9d768b631e3e540e9b17e75cc63cb Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Tue, 19 Dec 2023 14:45:49 -0800 Subject: [PATCH 4/8] more work --- docs/integrations/langchain.md | 49 +++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md index b68c4d1..e389ff1 100644 --- a/docs/integrations/langchain.md +++ b/docs/integrations/langchain.md @@ -1,11 +1,18 @@ --- slug: /integrations/langchain -title: ๐Ÿฆœ๏ธ๐Ÿ”— Langchain +title: ๐Ÿฆœ๏ธ๐Ÿ”— LangChain --- LangChain is a popular open-source framework for developing applications powered by language models. -> Insert star counter... other stuff like that + + +[MIT License](https://github.com/langchain-ai/langchain/blob/master/LICENSE)  • [Site](https://www.langchain.com/) + +| Languages | Docs | Github | +|---|---|--|--| +|Python | [Docs](https://python.langchain.com/docs/get_started/introduction) | [Code](https://github.com/langchain-ai/langchain) +|JS | [Docs](https://js.langchain.com/docs/get_started/introduction) | [Code](https://github.com/langchain-ai/langchainjs) ### Install @@ -13,9 +20,9 @@ LangChain is a popular open-source framework for developing applications powered ### Main Benefits -- 1 -- 2 -- 3 +- Common Patterns for chain-of-thought and prompt templating +- Many integrations and data loaders +- Deep integration to LangSmith monitoring (developed by the same team) ### Simple Example @@ -48,7 +55,37 @@ print("There are", langchain_chroma._collection.count(), "in the collection") #### Javascript ```js -// stuff goes here +import { OpenAI } from "langchain/llms/openai"; +import { ConversationalRetrievalQAChain } from "langchain/chains"; +import { Chroma } from "langchain/vectorstores/chroma"; +import { OpenAIEmbeddings } from "langchain/embeddings/openai"; +import { RecursiveCharacterTextSplitter } from "langchain/text_splitter"; +import * as fs from "fs"; + +// to run this first run a chroma server with `chroma run --path /path/to/data` + +export const run = async () => { + /* Initialize the LLM to use to answer the question */ + const model = new OpenAI(); + /* Load in the file we want to do question answering over */ + const text = fs.readFileSync("state_of_the_union.txt", "utf8"); + /* Split the text into chunks */ + const textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000 }); + const docs = await textSplitter.createDocuments([text]); + /* Create the vectorstore */ + const vectorStore = await Chroma.fromDocuments(docs, new OpenAIEmbeddings(), { + collectionName: "state_of_the_union", + }); + /* Create the chain */ + const chain = ConversationalRetrievalQAChain.fromLLM( + model, + vectorStore.asRetriever() + ); + /* Ask it a question */ + const question = "What did the president say about Justice Breyer?"; + const res = await chain.call({ question, chat_history: [] }); + console.log(res); +}; ``` ### Resources From 15e5ea653f7d891611cbd5331b8a7e6ce9ae8ad7 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Fri, 22 Dec 2023 12:21:51 -0500 Subject: [PATCH 5/8] add page --- docs/integrations/add.md | 45 ++++++++++++++++++++++++++++++++++++++++ sidebars.js | 2 ++ 2 files changed, 47 insertions(+) create mode 100644 docs/integrations/add.md diff --git a/docs/integrations/add.md b/docs/integrations/add.md new file mode 100644 index 0000000..fa49bb8 --- /dev/null +++ b/docs/integrations/add.md @@ -0,0 +1,45 @@ +--- +slug: /integrations/add +title: โž• Add +--- + +Integrations help Chroma users build better applications, faster! + +### Helpful Integrations + +Chroma prioritizes integrations that are genuinely useful to users building LLM applications. + +Examples include: +- Orchestration +- Monitoring +- Application frameworks + +If you'd like to add an `Embedding` integration - please head over to that section of the docs. + +### Integration Page + +Please open a new PR in the [Docs Repo](https://github.com/chroma-core/docs). + +We strongly encourage having the following sections: +- Non-jargony 1 paragraph overview +- Github Star Count (if applicable) +- Open Source License (if applicable) +- Relavent links (site, docs, repo) +- Languages supported (Chroma has 1st party Python and JS support) +- How to install +- A simple getting-started example +- Resource and Tutorial links + +The [๐ŸŽˆ Streamlit](/integrations/streamlit) integration is a great reference! + +## Integration Overview and Sidebar + +You will also want to add your integration to the integration overview page and sidebar. + +### Overview page + +Add your integration to the list here: https://github.com/chroma-core/docs/blob/main/docs/integrations/index.md + +### Sidebar + +Add your integration to the sidebar: https://github.com/chroma-core/docs/blob/main/sidebars.js \ No newline at end of file diff --git a/sidebars.js b/sidebars.js index 57130ed..4265080 100644 --- a/sidebars.js +++ b/sidebars.js @@ -57,6 +57,7 @@ const sidebars = { href: '/' }, 'integrations/index', + 'integrations/add', { type: 'category', label: 'Integrations', @@ -65,6 +66,7 @@ const sidebars = { items: [ 'integrations/langchain', 'integrations/llama-index', + ], }, ], From 4dca003b52e604775440fdda917159ac36b070f6 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Fri, 22 Dec 2023 12:33:56 -0500 Subject: [PATCH 6/8] add pages --- docs/embeddings.md | 2 ++ docs/embeddings/add.md | 41 ++++++++++++++++++++++++++++++++++ docs/integrations/add.md | 1 + docs/integrations/index.md | 2 ++ docs/integrations/streamlit.md | 3 ++- sidebars.js | 1 + 6 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 docs/embeddings/add.md diff --git a/docs/embeddings.md b/docs/embeddings.md index b1adb3e..cd391ad 100644 --- a/docs/embeddings.md +++ b/docs/embeddings.md @@ -23,6 +23,8 @@ Chroma provides lightweight wrappers around popular embedding providers, making We welcome pull requests to add new Embedding Functions to the community. +[โž• Add New](/embeddings/add) + *** ## Default: all-MiniLM-L6-v2 diff --git a/docs/embeddings/add.md b/docs/embeddings/add.md new file mode 100644 index 0000000..19528a9 --- /dev/null +++ b/docs/embeddings/add.md @@ -0,0 +1,41 @@ +--- +title: โž• Add +--- + +New embeddings integrations help Chroma users build better applications, faster! + +### Process + +To land a new embedding integration, you will want to add the integrations to the Python and JS clients. +- [Python embedding functions](https://github.com/chroma-core/chroma/blob/12785d71ea476ef3cbd28b419e7807bf7f2129d3/chromadb/utils/embedding_functions.py#L4) +- [JS embedding functions](https://github.com/chroma-core/chroma/tree/main/clients/js/src/embeddings) + +We will accept embedding functions with just one or the other in some circumstances, but we really like to keep parity between them. + +The process is: +1. Please open a new PR in the [Chroma Repo](https://github.com/chroma-core/chroma). +2. Please open a new PR in the [Docs Repo](https://github.com/chroma-core/docs). +3. Chroma will approve and land the new embedding functions and cut a new Python/JS release +4. The docs PR will merge and be deployed + +### Embedding Page + +We strongly encourage having the following sections: +- Overview +- Info table on available models and their details +- Simple getting-started usage for Python and JS +- Advanced usage section with examples and links to other resources + +The [OpenAI](/embeddings/openai) integration is a great reference! + +## Integration Overview and Sidebar + +You will also want to add your embedding functions to the embeddings overview page and sidebar. + +### Overview page + +Add your embedding function to the list here: https://github.com/chroma-core/docs/blob/main/docs/embeddings/index.md + +### Sidebar + +Add your embedding function to the sidebar: https://github.com/chroma-core/docs/blob/main/sidebars.js \ No newline at end of file diff --git a/docs/integrations/add.md b/docs/integrations/add.md index fa49bb8..345a499 100644 --- a/docs/integrations/add.md +++ b/docs/integrations/add.md @@ -27,6 +27,7 @@ We strongly encourage having the following sections: - Relavent links (site, docs, repo) - Languages supported (Chroma has 1st party Python and JS support) - How to install +- Main benefits (optimize for clear, non-markety language) - A simple getting-started example - Resource and Tutorial links diff --git a/docs/integrations/index.md b/docs/integrations/index.md index 77e45dc..d4187e3 100644 --- a/docs/integrations/index.md +++ b/docs/integrations/index.md @@ -22,6 +22,8 @@ We welcome pull requests to add new Integrations to the community. *Coming soon* - integrations with LangSmith, JinaAI, and more. +[โž• Add New](/integrations/add) + *** ### ๐Ÿงฌ Embeddings diff --git a/docs/integrations/streamlit.md b/docs/integrations/streamlit.md index 93ad15a..03d9a33 100644 --- a/docs/integrations/streamlit.md +++ b/docs/integrations/streamlit.md @@ -9,7 +9,8 @@ Streamlit is an open-source Python library that makes it easy to create and shar [Apache 2.0 License](https://github.com/streamlit/streamlit/blob/develop/LICENSE)  • [Site](https://streamlit.io/) -| Languages | Docs | Github |
+| Languages | Docs | Github | +|--|--|--| | Python | [Docs](https://docs.streamlit.io/) | [Code](https://github.com/streamlit/streamlit) ### Install diff --git a/sidebars.js b/sidebars.js index 6c72934..074441a 100644 --- a/sidebars.js +++ b/sidebars.js @@ -79,6 +79,7 @@ const sidebars = { href: '/' }, 'embeddings', + 'embeddings/add', { type: 'category', label: 'Integrations', From 799f33950379b37eeac82f1a1534c57e414c2355 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Fri, 22 Dec 2023 12:50:01 -0500 Subject: [PATCH 7/8] cleanup google ef --- docs/embeddings/google-gemini.md | 89 ++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 16 deletions(-) diff --git a/docs/embeddings/google-gemini.md b/docs/embeddings/google-gemini.md index ec29984..4da835e 100644 --- a/docs/embeddings/google-gemini.md +++ b/docs/embeddings/google-gemini.md @@ -3,25 +3,24 @@ # Google Generative AI -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; +Chroma also provides a convenient wrapper around Google's embedding API. This embedding function runs remotely on Google servers, and requires an API key. You can get an API key by signing up for an account at [Google MakerSuite](https://makersuite.google.com/). -
Select a language
+
- - - - +| Models | Input | Dimensionality | +|--|--|--|--|--| +|`models/embedding-001` | English | 768 | -Chroma provides a convenient wrapper around Google's Generative AI embedding API. This embedding function runs remotely on Google's servers, and requires an API key. +## Basic Usage -You can get an API key by signing up for an account at [Google MakerSuite](https://makersuite.google.com/). - - - +### Python This embedding function relies on the `google-generativeai` python package, which you can install with `pip install google-generativeai`. +```bash +pip install google-generativeai +``` + ```python # import import chromadb @@ -40,11 +39,14 @@ You can view a more [complete example](https://github.com/chroma-core/chroma/tre For more info - please visit the [official Google python docs](https://ai.google.dev/tutorials/python_quickstart). - - +### Javascript This embedding function relies on the `@google/generative-ai` npm package, which you can install with `yarn add @google/generative-ai`. +```bash +yarn add @google/generative-ai +``` + ```javascript import { ChromaClient, GoogleGenerativeAiEmbeddingFunction } from 'chromadb' const embedder = new GoogleGenerativeAiEmbeddingFunction({googleApiKey: ""}) @@ -61,7 +63,62 @@ You can view a more [complete example using Node](https://github.com/chroma-core For more info - please visit the [official Google JS docs](https://ai.google.dev/tutorials/node_quickstart). - +## Advanced Usage + +### Call directly + +By passing the embedding function to a Collection, Chroma handles the embedding of documents and queries for you. However in some cases you may want to generate the embeddings outside and handle them yourself. + +#### Python + +```python +embeddings = embedder(["document1","document2"]) +# [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] +``` + +#### Javascript + +```javascript +const embeddings = embedder.generate(["document1","document2"]) +// [[0.04565250128507614, 0.01611952856183052...], [0.030171213671565056, 0.007690359838306904...]] +``` + + + +### Task Type + +Google's Embedding endpoint also accepts a `task_type`/`taskType` parameter. This may boost performance for your specific usage. + +
+ +| Task Type| Description| +|--|--| +|RETRIEVAL_QUERY | Specifies the given text is a query in a search/retrieval setting.| +|RETRIEVAL_DOCUMENT |Specifies the given text is a document in a search/retrieval setting. Using this task type requires a title.| +|SEMANTIC_SIMILARITY| Specifies the given text will be used for Semantic Textual Similarity (STS).| +|CLASSIFICATION |Specifies that the embeddings will be used for classification.| +|CLUSTERING |Specifies that the embeddings will be used for clustering.| + +Here is a python demonstration of how to use `RETRIEVAL_QUERY` with `RETRIEVAL_DOCUMENT`. + +```python +# import +import chromadb +from chromadb.utils import embedding_functions + +google_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key="YOUR_API_KEY", task_type='RETRIEVAL_DOCUMENT') + +# pass documents to query for .add and .query +collection = client.create_collection(name="name", embedding_function=google_ef) + +# add your documents +collection.add(...) + +# create a new EF for Query and re-get your collection +google_ef2 = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key="YOUR_API_KEY", task_type='RETRIEVAL_QUERY') +collection = client.get_collection(name="name", embedding_function=google_ef2) -
+# query your documents +collection.query(...) +``` \ No newline at end of file From 2206bc01cead4304b2746849cd1241a36db7f389 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Fri, 22 Dec 2023 12:59:32 -0500 Subject: [PATCH 8/8] bump langchain --- docs/integrations/langchain.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/integrations/langchain.md b/docs/integrations/langchain.md index 8d38664..aaac452 100644 --- a/docs/integrations/langchain.md +++ b/docs/integrations/langchain.md @@ -28,6 +28,8 @@ LangChain is a popular open-source framework for developing applications powered #### Python +[Langchain Python Docs - Chroma Integration](https://python.langchain.com/docs/integrations/vectorstores/chroma) + ```python import chromadb from langchain.vectorstores import Chroma @@ -54,6 +56,8 @@ print("There are", langchain_chroma._collection.count(), "in the collection") #### Javascript +[Langchain JS Docs - Chroma Integration](https://js.langchain.com/docs/integrations/vectorstores/chroma) + ```js import { OpenAI } from "langchain/llms/openai"; import { ConversationalRetrievalQAChain } from "langchain/chains";