Skip to content

Commit 98612e4

Browse files
0xcha05hinthornw
authored andcommitted
Added filter and delete all option to delete function in Pinecone integration, updated base VectorStore's delete function (#6876)
### Description: Updated the delete function in the Pinecone integration to allow for deletion of vectors by specifying a filter condition, and to delete all vectors in a namespace. Made the ids parameter optional in the delete function in the base VectorStore class and allowed for additional keyword arguments. Updated the delete function in several classes (Redis, Chroma, Supabase, Deeplake, Elastic, Weaviate, and Cassandra) to match the changes made in the base VectorStore class. This involved making the ids parameter optional and allowing for additional keyword arguments.
1 parent 7dd27ca commit 98612e4

File tree

9 files changed

+58
-34
lines changed

9 files changed

+58
-34
lines changed

langchain/vectorstores/base.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,20 +53,19 @@ def add_texts(
5353
List of ids from adding the texts into the vectorstore.
5454
"""
5555

56-
def delete(self, ids: List[str]) -> Optional[bool]:
57-
"""Delete by vector ID.
56+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
57+
"""Delete by vector ID or other criteria.
5858
5959
Args:
6060
ids: List of ids to delete.
61+
**kwargs: Other keyword arguments that subclasses might use.
6162
6263
Returns:
6364
Optional[bool]: True if deletion is successful,
6465
False otherwise, None if not implemented.
6566
"""
6667

67-
raise NotImplementedError(
68-
"delete_by_id method must be implemented by subclass."
69-
)
68+
raise NotImplementedError("delete method must be implemented by subclass.")
7069

7170
async def aadd_texts(
7271
self,

langchain/vectorstores/cassandra.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,9 @@ def clear(self) -> None:
9191
def delete_by_document_id(self, document_id: str) -> None:
9292
return self.table.delete(document_id)
9393

94-
def delete(self, ids: List[str]) -> Optional[bool]:
95-
"""Delete by vector ID.
94+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
95+
"""Delete by vector IDs.
96+
9697
9798
Args:
9899
ids: List of ids to delete.
@@ -101,6 +102,10 @@ def delete(self, ids: List[str]) -> Optional[bool]:
101102
Optional[bool]: True if deletion is successful,
102103
False otherwise, None if not implemented.
103104
"""
105+
106+
if ids is None:
107+
raise ValueError("No ids provided to delete.")
108+
104109
for document_id in ids:
105110
self.delete_by_document_id(document_id)
106111
return True

langchain/vectorstores/chroma.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ def from_documents(
470470
client=client,
471471
)
472472

473-
def delete(self, ids: List[str]) -> None:
473+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
474474
"""Delete by vector IDs.
475475
476476
Args:

langchain/vectorstores/deeplake.py

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -744,30 +744,23 @@ def from_texts(
744744
)
745745
return deeplake_dataset
746746

747-
def delete(
748-
self,
749-
ids: Any[List[str], None] = None,
750-
filter: Any[Dict[str, str], None] = None,
751-
delete_all: Any[bool, None] = None,
752-
) -> bool:
747+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> bool:
753748
"""Delete the entities in the dataset.
754749
755750
Args:
756751
ids (Optional[List[str]], optional): The document_ids to delete.
757752
Defaults to None.
758-
filter (Optional[Dict[str, str]], optional): The filter to delete by.
759-
Defaults to None.
760-
delete_all (Optional[bool], optional): Whether to drop the dataset.
761-
Defaults to None.
753+
**kwargs: Other keyword arguments that subclasses might use.
754+
- filter (Optional[Dict[str, str]], optional): The filter to delete by.
755+
- delete_all (Optional[bool], optional): Whether to drop the dataset.
762756
763757
Returns:
764758
bool: Whether the delete operation was successful.
765759
"""
766-
self.vectorstore.delete(
767-
ids=ids,
768-
filter=filter,
769-
delete_all=delete_all,
770-
)
760+
filter = kwargs.get("filter")
761+
delete_all = kwargs.get("delete_all")
762+
763+
self.vectorstore.delete(ids=ids, filter=filter, delete_all=delete_all)
771764

772765
return True
773766

langchain/vectorstores/elastic_vector_search.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,13 +317,16 @@ def client_search(
317317
)
318318
return response
319319

320-
def delete(self, ids: List[str]) -> None:
320+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
321321
"""Delete by vector IDs.
322322
323323
Args:
324324
ids: List of ids to delete.
325325
"""
326326

327+
if ids is None:
328+
raise ValueError("No ids provided to delete.")
329+
327330
# TODO: Check if this can be done in bulk
328331
for id in ids:
329332
self.client.delete(index=self.index_name, id=id)

langchain/vectorstores/pinecone.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -354,16 +354,33 @@ def from_existing_index(
354354
pinecone.Index(index_name), embedding.embed_query, text_key, namespace
355355
)
356356

357-
def delete(self, ids: List[str], namespace: Optional[str] = None) -> None:
358-
"""Delete by vector IDs.
357+
def delete(
358+
self,
359+
ids: Optional[List[str]] = None,
360+
delete_all: Optional[bool] = None,
361+
namespace: Optional[str] = None,
362+
filter: Optional[dict] = None,
363+
**kwargs: Any,
364+
) -> None:
365+
"""Delete by vector IDs or filter.
359366
Args:
360367
ids: List of ids to delete.
368+
filter: Dictionary of conditions to filter vectors to delete.
361369
"""
362370

363-
# This is the maximum number of IDs that can be deleted
364371
if namespace is None:
365372
namespace = self._namespace
366-
chunk_size = 1000
367-
for i in range(0, len(ids), chunk_size):
368-
chunk = ids[i : i + chunk_size]
369-
self._index.delete(ids=chunk, namespace=namespace)
373+
374+
if delete_all:
375+
self._index.delete(delete_all=True, namespace=namespace, **kwargs)
376+
elif ids is not None:
377+
chunk_size = 1000
378+
for i in range(0, len(ids), chunk_size):
379+
chunk = ids[i : i + chunk_size]
380+
self._index.delete(ids=chunk, namespace=namespace, **kwargs)
381+
elif filter is not None:
382+
self._index.delete(filter=filter, namespace=namespace, **kwargs)
383+
else:
384+
raise ValueError("Either ids, delete_all, or filter must be provided.")
385+
386+
return None

langchain/vectorstores/redis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ def from_texts(
469469

470470
@staticmethod
471471
def delete(
472-
ids: List[str],
472+
ids: Optional[List[str]] = None,
473473
**kwargs: Any,
474474
) -> bool:
475475
"""

langchain/vectorstores/supabase.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,12 +346,16 @@ def max_marginal_relevance_search(
346346
)
347347
return docs
348348

349-
def delete(self, ids: List[str]) -> None:
349+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
350350
"""Delete by vector IDs.
351351
352352
Args:
353353
ids: List of ids to delete.
354354
"""
355+
356+
if ids is None:
357+
raise ValueError("No ids provided to delete.")
358+
355359
rows: List[dict[str, Any]] = [
356360
{
357361
"id": id,

langchain/vectorstores/weaviate.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,13 +470,16 @@ def from_texts(
470470
by_text=by_text,
471471
)
472472

473-
def delete(self, ids: List[str]) -> None:
473+
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
474474
"""Delete by vector IDs.
475475
476476
Args:
477477
ids: List of ids to delete.
478478
"""
479479

480+
if ids is None:
481+
raise ValueError("No ids provided to delete.")
482+
480483
# TODO: Check if this can be done in bulk
481484
for id in ids:
482485
self._client.data_object.delete(uuid=id)

0 commit comments

Comments
 (0)