From b8d4301c1c89a6e15403a621bfded22564b3636f Mon Sep 17 00:00:00 2001 From: Roo Code Date: Sat, 12 Jul 2025 07:13:42 +0000 Subject: [PATCH 1/3] fix: resolve vector dimension mismatch error when switching embedding models (#5616) - Enhanced QdrantVectorStore.initialize() with robust dimension mismatch handling - Added atomic collection recreation with step-by-step verification - Improved error reporting with detailed context for better user experience - Added comprehensive test coverage for all dimension mismatch scenarios - Fixes issue where switching from 2048-dim to 768-dim models would fail --- .../__tests__/qdrant-client.spec.ts | 221 ++++++++++++++++-- .../code-index/vector-store/qdrant-client.ts | 158 +++++++++---- 2 files changed, 314 insertions(+), 65 deletions(-) diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index 8bd145ac409..e539c2edde4 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -10,7 +10,16 @@ vitest.mock("@qdrant/js-client-rest") vitest.mock("crypto") vitest.mock("../../../../utils/path") vitest.mock("../../../../i18n", () => ({ - t: (key: string) => key, // Just return the key for testing + t: (key: string, params?: any) => { + // Mock translation function that includes parameters for testing + if (key === "embeddings:vectorStore.vectorDimensionMismatch" && params?.errorMessage) { + return `Failed to update vector index for new model. Please try clearing the index and starting again. Details: ${params.errorMessage}` + } + if (key === "embeddings:vectorStore.qdrantConnectionFailed" && params?.qdrantUrl && params?.errorMessage) { + return `Failed to connect to Qdrant vector database. Please ensure Qdrant is running and accessible at ${params.qdrantUrl}. Error: ${params.errorMessage}` + } + return key // Just return the key for other cases + }, })) vitest.mock("path", () => ({ ...vitest.importActual("path"), @@ -564,16 +573,22 @@ describe("QdrantVectorStore", () => { }) it("should recreate collection if it exists but vectorSize mismatches and return true", async () => { const differentVectorSize = 768 - // Mock getCollection to return existing collection info with different vector size - mockQdrantClientInstance.getCollection.mockResolvedValue({ - config: { - params: { - vectors: { - size: differentVectorSize, // Mismatching vector size + // Mock getCollection to return existing collection info with different vector size first, + // then return 404 to confirm deletion + mockQdrantClientInstance.getCollection + .mockResolvedValueOnce({ + config: { + params: { + vectors: { + size: differentVectorSize, // Mismatching vector size + }, }, }, - }, - } as any) + } as any) + .mockRejectedValueOnce({ + response: { status: 404 }, + message: "Not found", + }) mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any) mockQdrantClientInstance.createCollection.mockResolvedValue(true as any) mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any) @@ -582,7 +597,7 @@ describe("QdrantVectorStore", () => { const result = await vectorStore.initialize() expect(result).toBe(true) - expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2) // Once to check, once to verify deletion expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledWith(expectedCollectionName) expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1) expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledWith(expectedCollectionName) @@ -703,7 +718,7 @@ describe("QdrantVectorStore", () => { } expect(caughtError).toBeDefined() - expect(caughtError.message).toContain("embeddings:vectorStore.vectorDimensionMismatch") + expect(caughtError.message).toContain("Failed to update vector index for new model") expect(caughtError.cause).toBe(deleteError) expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1) @@ -719,15 +734,21 @@ describe("QdrantVectorStore", () => { it("should throw vectorDimensionMismatch error when createCollection fails during recreation", async () => { const differentVectorSize = 768 - mockQdrantClientInstance.getCollection.mockResolvedValue({ - config: { - params: { - vectors: { - size: differentVectorSize, + mockQdrantClientInstance.getCollection + .mockResolvedValueOnce({ + config: { + params: { + vectors: { + size: differentVectorSize, + }, }, }, - }, - } as any) + } as any) + // Second call should return 404 to confirm deletion + .mockRejectedValueOnce({ + response: { status: 404 }, + message: "Not found", + }) // Delete succeeds but create fails mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any) @@ -745,10 +766,10 @@ describe("QdrantVectorStore", () => { } expect(caughtError).toBeDefined() - expect(caughtError.message).toContain("embeddings:vectorStore.vectorDimensionMismatch") + expect(caughtError.message).toContain("Failed to update vector index for new model") expect(caughtError.cause).toBe(createError) - expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2) expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1) expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledTimes(1) expect(mockQdrantClientInstance.createPayloadIndex).not.toHaveBeenCalled() @@ -758,6 +779,166 @@ describe("QdrantVectorStore", () => { ;(console.error as any).mockRestore() ;(console.warn as any).mockRestore() }) + + it("should verify collection deletion before proceeding with recreation", async () => { + const differentVectorSize = 768 + mockQdrantClientInstance.getCollection + .mockResolvedValueOnce({ + config: { + params: { + vectors: { + size: differentVectorSize, + }, + }, + }, + } as any) + // Second call should return 404 to confirm deletion + .mockRejectedValueOnce({ + response: { status: 404 }, + message: "Not found", + }) + + mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any) + vitest.spyOn(console, "warn").mockImplementation(() => {}) + + const result = await vectorStore.initialize() + + expect(result).toBe(true) + // Should call getCollection twice: once to check existing, once to verify deletion + expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2) + expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(5) + ;(console.warn as any).mockRestore() + }) + + it("should throw error if collection still exists after deletion attempt", async () => { + const differentVectorSize = 768 + mockQdrantClientInstance.getCollection + .mockResolvedValueOnce({ + config: { + params: { + vectors: { + size: differentVectorSize, + }, + }, + }, + } as any) + // Second call should still return the collection (deletion failed) + .mockResolvedValueOnce({ + config: { + params: { + vectors: { + size: differentVectorSize, + }, + }, + }, + } as any) + + mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any) + vitest.spyOn(console, "error").mockImplementation(() => {}) + vitest.spyOn(console, "warn").mockImplementation(() => {}) + + let caughtError: any + try { + await vectorStore.initialize() + } catch (error: any) { + caughtError = error + } + + expect(caughtError).toBeDefined() + expect(caughtError.message).toContain("Failed to update vector index for new model") + // The error message should contain the contextual error details + expect(caughtError.message).toContain("Deleted existing collection but failed verification step") + + expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2) + expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.createCollection).not.toHaveBeenCalled() + expect(mockQdrantClientInstance.createPayloadIndex).not.toHaveBeenCalled() + ;(console.error as any).mockRestore() + ;(console.warn as any).mockRestore() + }) + + it("should handle dimension mismatch scenario from 2048 to 768 dimensions", async () => { + // Simulate the exact scenario from the issue: switching from 2048 to 768 dimensions + const oldVectorSize = 2048 + const newVectorSize = 768 + + // Create a new vector store with the new dimension + const newVectorStore = new QdrantVectorStore(mockWorkspacePath, mockQdrantUrl, newVectorSize, mockApiKey) + + mockQdrantClientInstance.getCollection + .mockResolvedValueOnce({ + config: { + params: { + vectors: { + size: oldVectorSize, // Existing collection has 2048 dimensions + }, + }, + }, + } as any) + // Second call should return 404 to confirm deletion + .mockRejectedValueOnce({ + response: { status: 404 }, + message: "Not found", + }) + + mockQdrantClientInstance.deleteCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any) + vitest.spyOn(console, "warn").mockImplementation(() => {}) + + const result = await newVectorStore.initialize() + + expect(result).toBe(true) + expect(mockQdrantClientInstance.getCollection).toHaveBeenCalledTimes(2) + expect(mockQdrantClientInstance.deleteCollection).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.createCollection).toHaveBeenCalledWith(expectedCollectionName, { + vectors: { + size: newVectorSize, // Should create with new 768 dimensions + distance: "Cosine", + }, + }) + expect(mockQdrantClientInstance.createPayloadIndex).toHaveBeenCalledTimes(5) + ;(console.warn as any).mockRestore() + }) + + it("should provide detailed error context for different failure scenarios", async () => { + const differentVectorSize = 768 + mockQdrantClientInstance.getCollection.mockResolvedValue({ + config: { + params: { + vectors: { + size: differentVectorSize, + }, + }, + }, + } as any) + + // Test deletion failure with specific error message + const deleteError = new Error("Qdrant server unavailable") + mockQdrantClientInstance.deleteCollection.mockRejectedValue(deleteError) + vitest.spyOn(console, "error").mockImplementation(() => {}) + vitest.spyOn(console, "warn").mockImplementation(() => {}) + + let caughtError: any + try { + await vectorStore.initialize() + } catch (error: any) { + caughtError = error + } + + expect(caughtError).toBeDefined() + expect(caughtError.message).toContain("Failed to update vector index for new model") + // The error message should contain the contextual error details + expect(caughtError.message).toContain("Failed to delete existing collection with vector size") + expect(caughtError.message).toContain("Qdrant server unavailable") + expect(caughtError.cause).toBe(deleteError) + ;(console.error as any).mockRestore() + ;(console.warn as any).mockRestore() + }) }) it("should return true when collection exists", async () => { diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index b23f5bca8a4..5121d65b97c 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -160,58 +160,32 @@ export class QdrantVectorStore implements IVectorStore { created = true } else { // Collection exists, check vector size - const existingVectorSize = collectionInfo.config?.params?.vectors?.size + const vectorsConfig = collectionInfo.config?.params?.vectors + let existingVectorSize: number + + if (typeof vectorsConfig === "number") { + existingVectorSize = vectorsConfig + } else if ( + vectorsConfig && + typeof vectorsConfig === "object" && + "size" in vectorsConfig && + typeof vectorsConfig.size === "number" + ) { + existingVectorSize = vectorsConfig.size + } else { + existingVectorSize = 0 // Fallback for unknown configuration + } + if (existingVectorSize === this.vectorSize) { created = false // Exists and correct } else { - // Exists but wrong vector size, recreate - try { - console.warn( - `[QdrantVectorStore] Collection ${this.collectionName} exists with vector size ${existingVectorSize}, but expected ${this.vectorSize}. Recreating collection.`, - ) - await this.client.deleteCollection(this.collectionName) - await this.client.createCollection(this.collectionName, { - vectors: { - size: this.vectorSize, - distance: this.DISTANCE_METRIC, - }, - }) - created = true - } catch (recreationError) { - const errorMessage = - recreationError instanceof Error ? recreationError.message : String(recreationError) - console.error( - `[QdrantVectorStore] CRITICAL: Failed to recreate collection ${this.collectionName} for new vector size. Error: ${errorMessage}`, - ) - const dimensionMismatchError = new Error( - t("embeddings:vectorStore.vectorDimensionMismatch", { - errorMessage, - }), - ) - // Use error.cause to preserve the original error context - dimensionMismatchError.cause = recreationError - throw dimensionMismatchError - } + // Exists but wrong vector size, recreate with enhanced error handling + created = await this._recreateCollectionWithNewDimension(existingVectorSize) } } // Create payload indexes - for (let i = 0; i <= 4; i++) { - try { - await this.client.createPayloadIndex(this.collectionName, { - field_name: `pathSegments.${i}`, - field_schema: "keyword", - }) - } catch (indexError: any) { - const errorMessage = (indexError?.message || "").toLowerCase() - if (!errorMessage.includes("already exists")) { - console.warn( - `[QdrantVectorStore] Could not create payload index for pathSegments.${i} on ${this.collectionName}. Details:`, - indexError?.message || indexError, - ) - } - } - } + await this._createPayloadIndexes() return created } catch (error: any) { const errorMessage = error?.message || error @@ -232,6 +206,100 @@ export class QdrantVectorStore implements IVectorStore { } } + /** + * Recreates the collection with a new vector dimension, handling failures gracefully. + * @param existingVectorSize The current vector size of the existing collection + * @returns Promise resolving to boolean indicating if a new collection was created + */ + private async _recreateCollectionWithNewDimension(existingVectorSize: number): Promise { + console.warn( + `[QdrantVectorStore] Collection ${this.collectionName} exists with vector size ${existingVectorSize}, but expected ${this.vectorSize}. Recreating collection.`, + ) + + let deletionSucceeded = false + let recreationAttempted = false + + try { + // Step 1: Attempt to delete the existing collection + console.log(`[QdrantVectorStore] Deleting existing collection ${this.collectionName}...`) + await this.client.deleteCollection(this.collectionName) + deletionSucceeded = true + console.log(`[QdrantVectorStore] Successfully deleted collection ${this.collectionName}`) + + // Step 2: Wait a brief moment to ensure deletion is processed + await new Promise((resolve) => setTimeout(resolve, 100)) + + // Step 3: Verify the collection is actually deleted + const verificationInfo = await this.getCollectionInfo() + if (verificationInfo !== null) { + throw new Error("Collection still exists after deletion attempt") + } + + // Step 4: Create the new collection with correct dimensions + console.log( + `[QdrantVectorStore] Creating new collection ${this.collectionName} with vector size ${this.vectorSize}...`, + ) + recreationAttempted = true + await this.client.createCollection(this.collectionName, { + vectors: { + size: this.vectorSize, + distance: this.DISTANCE_METRIC, + }, + }) + console.log(`[QdrantVectorStore] Successfully created new collection ${this.collectionName}`) + return true + } catch (recreationError) { + const errorMessage = recreationError instanceof Error ? recreationError.message : String(recreationError) + + // Provide detailed error context based on what stage failed + let contextualErrorMessage: string + if (!deletionSucceeded) { + contextualErrorMessage = `Failed to delete existing collection with vector size ${existingVectorSize}. ${errorMessage}` + } else if (!recreationAttempted) { + contextualErrorMessage = `Deleted existing collection but failed verification step. ${errorMessage}` + } else { + contextualErrorMessage = `Deleted existing collection but failed to create new collection with vector size ${this.vectorSize}. ${errorMessage}` + } + + console.error( + `[QdrantVectorStore] CRITICAL: Failed to recreate collection ${this.collectionName} for dimension change (${existingVectorSize} -> ${this.vectorSize}). ${contextualErrorMessage}`, + ) + + // Create a comprehensive error message for the user + const dimensionMismatchError = new Error( + t("embeddings:vectorStore.vectorDimensionMismatch", { + errorMessage: contextualErrorMessage, + }), + ) + + // Preserve the original error context + dimensionMismatchError.cause = recreationError + throw dimensionMismatchError + } + } + + /** + * Creates payload indexes for the collection, handling errors gracefully. + */ + private async _createPayloadIndexes(): Promise { + for (let i = 0; i <= 4; i++) { + try { + await this.client.createPayloadIndex(this.collectionName, { + field_name: `pathSegments.${i}`, + field_schema: "keyword", + }) + } catch (indexError: any) { + const errorMessage = (indexError?.message || "").toLowerCase() + if (!errorMessage.includes("already exists")) { + console.warn( + `[QdrantVectorStore] Could not create payload index for pathSegments.${i} on ${this.collectionName}. Details:`, + indexError?.message || indexError, + ) + } + } + } + } + /** * Upserts points into the vector store * @param points Array of points to upsert From e29f6335c4890e90bff596681a956d85a80a3db3 Mon Sep 17 00:00:00 2001 From: Daniel Riccio Date: Mon, 14 Jul 2025 10:50:10 -0500 Subject: [PATCH 2/3] feat: add additional configuration options for codebase index embedder --- src/core/webview/ClineProvider.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 12fd9dd3495..628329e6a1e 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1542,6 +1542,10 @@ export class ClineProvider codebaseIndexEmbedderProvider: codebaseIndexConfig?.codebaseIndexEmbedderProvider ?? "openai", codebaseIndexEmbedderBaseUrl: codebaseIndexConfig?.codebaseIndexEmbedderBaseUrl ?? "", codebaseIndexEmbedderModelId: codebaseIndexConfig?.codebaseIndexEmbedderModelId ?? "", + codebaseIndexEmbedderModelDimension: codebaseIndexConfig?.codebaseIndexEmbedderModelDimension, + codebaseIndexOpenAiCompatibleBaseUrl: codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl, + codebaseIndexSearchMaxResults: codebaseIndexConfig?.codebaseIndexSearchMaxResults, + codebaseIndexSearchMinScore: codebaseIndexConfig?.codebaseIndexSearchMinScore, }, mdmCompliant: this.checkMdmCompliance(), profileThresholds: profileThresholds ?? {}, @@ -1703,6 +1707,12 @@ export class ClineProvider stateValues.codebaseIndexConfig?.codebaseIndexEmbedderProvider ?? "openai", codebaseIndexEmbedderBaseUrl: stateValues.codebaseIndexConfig?.codebaseIndexEmbedderBaseUrl ?? "", codebaseIndexEmbedderModelId: stateValues.codebaseIndexConfig?.codebaseIndexEmbedderModelId ?? "", + codebaseIndexEmbedderModelDimension: + stateValues.codebaseIndexConfig?.codebaseIndexEmbedderModelDimension, + codebaseIndexOpenAiCompatibleBaseUrl: + stateValues.codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl, + codebaseIndexSearchMaxResults: stateValues.codebaseIndexConfig?.codebaseIndexSearchMaxResults, + codebaseIndexSearchMinScore: stateValues.codebaseIndexConfig?.codebaseIndexSearchMinScore, }, profileThresholds: stateValues.profileThresholds ?? {}, } From 15ec56ae41b570e824faa68967f87abde6885398 Mon Sep 17 00:00:00 2001 From: Daniel <57051444+daniel-lxs@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:10:22 -0500 Subject: [PATCH 3/3] Update src/core/webview/ClineProvider.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --- src/core/webview/ClineProvider.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 628329e6a1e..8fa9ceccfa6 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -1542,7 +1542,7 @@ export class ClineProvider codebaseIndexEmbedderProvider: codebaseIndexConfig?.codebaseIndexEmbedderProvider ?? "openai", codebaseIndexEmbedderBaseUrl: codebaseIndexConfig?.codebaseIndexEmbedderBaseUrl ?? "", codebaseIndexEmbedderModelId: codebaseIndexConfig?.codebaseIndexEmbedderModelId ?? "", - codebaseIndexEmbedderModelDimension: codebaseIndexConfig?.codebaseIndexEmbedderModelDimension, + codebaseIndexEmbedderModelDimension: codebaseIndexConfig?.codebaseIndexEmbedderModelDimension ?? 1536, codebaseIndexOpenAiCompatibleBaseUrl: codebaseIndexConfig?.codebaseIndexOpenAiCompatibleBaseUrl, codebaseIndexSearchMaxResults: codebaseIndexConfig?.codebaseIndexSearchMaxResults, codebaseIndexSearchMinScore: codebaseIndexConfig?.codebaseIndexSearchMinScore,