diff --git a/docs/reference/inference/chat-completion-inference.asciidoc b/docs/reference/inference/chat-completion-inference.asciidoc index 1d7d05b0f7d82..88699cca67af4 100644 --- a/docs/reference/inference/chat-completion-inference.asciidoc +++ b/docs/reference/inference/chat-completion-inference.asciidoc @@ -13,9 +13,9 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo [[chat-completion-inference-api-request]] ==== {api-request-title} -`POST /_inference//_unified` +`POST /_inference//_stream` -`POST /_inference/chat_completion//_unified` +`POST /_inference/chat_completion//_stream` [discrete] @@ -37,7 +37,7 @@ It only works with the `chat_completion` task type for `openai` and `elastic` {i [NOTE] ==== -* The `chat_completion` task type is only available within the _unified API and only supports streaming. +* The `chat_completion` task type is only available within the _stream API and only supports streaming. * The Chat completion {infer} API and the Stream {infer} API differ in their response structure and capabilities. The Chat completion {infer} API provides more comprehensive customization options through more fields and function calling support. If you use the `openai` service or the `elastic` service, use the Chat completion {infer} API. diff --git a/docs/reference/inference/elastic-infer-service.asciidoc b/docs/reference/inference/elastic-infer-service.asciidoc index 24ae7e20deec6..0ed820e1d3324 100644 --- a/docs/reference/inference/elastic-infer-service.asciidoc +++ b/docs/reference/inference/elastic-infer-service.asciidoc @@ -39,7 +39,7 @@ Available task types: [NOTE] ==== -The `chat_completion` task type only supports streaming and only through the `_unified` API. +The `chat_completion` task type only supports streaming and only through the `_stream` API. include::inference-shared.asciidoc[tag=chat-completion-docs] ==== @@ -121,4 +121,4 @@ PUT /_inference/chat_completion/chat-completion-endpoint } } ------------------------------------------------------------ -// TEST[skip:TBD] \ No newline at end of file +// TEST[skip:TBD] diff --git a/docs/reference/inference/service-openai.asciidoc b/docs/reference/inference/service-openai.asciidoc index 511632736a35b..d2c0dd460f9e7 100644 --- a/docs/reference/inference/service-openai.asciidoc +++ b/docs/reference/inference/service-openai.asciidoc @@ -38,7 +38,7 @@ Available task types: [NOTE] ==== -The `chat_completion` task type only supports streaming and only through the `_unified` API. +The `chat_completion` task type only supports streaming and only through the `_stream` API. include::inference-shared.asciidoc[tag=chat-completion-docs] ==== diff --git a/docs/reference/search/search-your-data/cohere-es.asciidoc b/docs/reference/search/search-your-data/cohere-es.asciidoc index 3029cfd9f098c..748ed2e0d4051 100644 --- a/docs/reference/search/search-your-data/cohere-es.asciidoc +++ b/docs/reference/search/search-your-data/cohere-es.asciidoc @@ -267,7 +267,7 @@ for hit in response["hits"]["hits"]: [[cohere-es-rerank-results]] ===== Rerank search results -To combine the results more effectively, use +To combine the results more effectively, use https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the {infer} API to provide a more precise semantic reranking of the results. @@ -297,7 +297,7 @@ Rerank the results using the new {infer} endpoint. [source,py] -------------------------------------------------- # Pass the query and the search results to the service -response = client.inference.inference( +response = client.inference.rerank( inference_id="cohere_rerank", body={ "query": query, @@ -322,7 +322,7 @@ for document in ranked_documents[0:10]: -------------------------------------------------- The response is a list of documents in descending order of relevance. Each -document has a corresponding index that reflects the order of the documents when +document has a corresponding index that reflects the order of the documents when they were sent to the {infer} endpoint. @@ -335,7 +335,7 @@ With the ranked results, you can build a RAG system on the top of what you previ Pass in the retrieved documents and the query to receive a grounded response using Cohere's newest generative model https://docs.cohere.com/docs/command-r-plus[Command R+]. -Then pass in the query and the documents to the Chat API, and print out the response. +Then pass in the query and the documents to the Chat API, and print out the response. [source,py] -------------------------------------------------- diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json new file mode 100644 index 0000000000000..98854625d0471 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json @@ -0,0 +1,37 @@ +{ + "inference.chat_completion_unified": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html", + "description": "Perform chat completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "text/event-stream" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/chat_completion/{inference_id}/_stream", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json new file mode 100644 index 0000000000000..6c753e59e3434 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json @@ -0,0 +1,37 @@ +{ + "inference.completion": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform completion inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/completion/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json index 14e7519c3796e..8887d9d0a1ebe 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json @@ -1,47 +1,49 @@ { - "inference.get":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html", - "description":"Get an inference endpoint" + "inference.get": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html", + "description": "Get an inference endpoint" }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "application/json"] + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ] }, - "url":{ - "paths":[ + "url": { + "paths": [ { - "path":"/_inference", - "methods":[ + "path": "/_inference", + "methods": [ "GET" ] }, { - "path":"/_inference/{inference_id}", - "methods":[ + "path": "/_inference/{inference_id}", + "methods": [ "GET" ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" } } }, { - "path":"/_inference/{task_type}/{inference_id}", - "methods":[ + "path": "/_inference/{task_type}/{inference_id}", + "methods": [ "GET" ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" + "parts": { + "task_type": { + "type": "string", + "description": "The task type" }, - "inference_id":{ - "type":"string", - "description":"The inference Id" + "inference_id": { + "type": "string", + "description": "The inference Id" } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json deleted file mode 100644 index eb4c1268c28ca..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "inference.inference":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", - "description":"Perform inference" - }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "application/json"], - "content_type": ["application/json"] - }, - "url":{ - "paths":[ - { - "path":"/_inference/{inference_id}", - "methods":[ - "POST" - ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - }, - { - "path":"/_inference/{task_type}/{inference_id}", - "methods":[ - "POST" - ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" - }, - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - } - ] - }, - "body":{ - "description":"The inference payload" - } - } -} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json index 411392fe39908..4879007724450 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json @@ -1,49 +1,53 @@ { - "inference.put":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html", - "description":"Configure an inference endpoint for use in the Inference API" + "inference.put": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html", + "description": "Configure an inference endpoint for use in the Inference API" }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "application/json"], - "content_type": ["application/json"] + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] }, - "url":{ - "paths":[ + "url": { + "paths": [ { - "path":"/_inference/{inference_id}", - "methods":[ + "path": "/_inference/{inference_id}", + "methods": [ "PUT" ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" } } }, { - "path":"/_inference/{task_type}/{inference_id}", - "methods":[ + "path": "/_inference/{task_type}/{inference_id}", + "methods": [ "PUT" ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" + "parts": { + "task_type": { + "type": "string", + "description": "The task type" }, - "inference_id":{ - "type":"string", - "description":"The inference Id" + "inference_id": { + "type": "string", + "description": "The inference Id" } } } ] }, - "body":{ - "description":"The inference endpoint's task and service settings" + "body": { + "description": "The inference endpoint's task and service settings" } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json new file mode 100644 index 0000000000000..c08a51a8b9b98 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json @@ -0,0 +1,37 @@ +{ + "inference.rerank": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform reranking inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/rerank/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json new file mode 100644 index 0000000000000..90ebb6e6dc4c2 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json @@ -0,0 +1,37 @@ +{ + "inference.sparse_embedding": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform sparse embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/sparse_embedding/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json new file mode 100644 index 0000000000000..a1d770c46305b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json @@ -0,0 +1,37 @@ +{ + "inference.stream_completion": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", + "description": "Perform streaming inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "text/event-stream" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/completion/{inference_id}/_stream", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json deleted file mode 100644 index 493306e10d5c7..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "inference.stream_inference":{ - "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html", - "description":"Perform streaming inference" - }, - "stability":"stable", - "visibility":"public", - "headers":{ - "accept": [ "text/event-stream"], - "content_type": ["application/json"] - }, - "url":{ - "paths":[ - { - "path":"/_inference/{inference_id}/_stream", - "methods":[ - "POST" - ], - "parts":{ - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - }, - { - "path":"/_inference/{task_type}/{inference_id}/_stream", - "methods":[ - "POST" - ], - "parts":{ - "task_type":{ - "type":"string", - "description":"The task type" - }, - "inference_id":{ - "type":"string", - "description":"The inference Id" - } - } - } - ] - }, - "body":{ - "description":"The inference payload" - } - } -} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json new file mode 100644 index 0000000000000..309a1d80b7416 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json @@ -0,0 +1,37 @@ +{ + "inference.text_embedding": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html", + "description": "Perform text embedding inference" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": [ + "application/json" + ], + "content_type": [ + "application/json" + ] + }, + "url": { + "paths": [ + { + "path": "/_inference/text_embedding/{inference_id}", + "methods": [ + "POST" + ], + "parts": { + "inference_id": { + "type": "string", + "description": "The inference Id" + } + } + } + ] + }, + "body": { + "description": "The inference payload" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json deleted file mode 100644 index 84182d19f8825..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "inference.unified_inference": { - "documentation": { - "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html", - "description": "Perform inference using the Unified Schema" - }, - "stability": "stable", - "visibility": "public", - "headers": { - "accept": ["text/event-stream"], - "content_type": ["application/json"] - }, - "url": { - "paths": [ - { - "path": "/_inference/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - }, - { - "path": "/_inference/{task_type}/{inference_id}/_unified", - "methods": ["POST"], - "parts": { - "task_type": { - "type": "string", - "description": "The task type" - }, - "inference_id": { - "type": "string", - "description": "The inference Id" - } - } - } - ] - }, - "body": { - "description": "The inference payload" - } - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java index 928da95d9c2f0..58c952b9c556a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.inference.external.http.sender; +import org.elasticsearch.inference.TaskType; + import java.util.List; import java.util.Objects; @@ -15,7 +17,7 @@ * The main difference between this class and {@link UnifiedChatInput} is this should only be used for * {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the * {@link org.elasticsearch.inference.InferenceService#infer} code path. These are requests sent to the - * API without using the _unified route. + * API without using the {@link TaskType#CHAT_COMPLETION} task type. */ public class ChatCompletionInput extends InferenceInputs { private final List input; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java index fceec7c431182..f4f0511a4cc1b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java @@ -10,6 +10,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.inference.UnifiedCompletionRequest; import java.util.List; @@ -20,7 +21,7 @@ * The main difference between this class and {@link ChatCompletionInput} is this should only be used for * {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the * {@link org.elasticsearch.inference.InferenceService#unifiedCompletionInfer(Model, UnifiedCompletionRequest, TimeValue, ActionListener)} - * code path. These are requests sent to the API with the _unified route. + * code path. These are requests sent to the API with the _stream route and {@link TaskType#CHAT_COMPLETION}. */ public class UnifiedChatInput extends InferenceInputs { private final UnifiedCompletionRequest request; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java index 7f43676dfb5f0..6d2a26b1f0966 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java @@ -24,22 +24,22 @@ public final class Paths { static final String INFERENCE_SERVICES_PATH = "_inference/_services"; static final String TASK_TYPE_INFERENCE_SERVICES_PATH = "_inference/_services/{" + TASK_TYPE + "}"; - static final String STREAM_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/_stream"; + public static final String STREAM_SUFFIX = "_stream"; + static final String STREAM_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/" + STREAM_SUFFIX; static final String STREAM_TASK_TYPE_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/{" + INFERENCE_ID - + "}/_stream"; + + "}/" + + STREAM_SUFFIX; // TODO remove the _unified path - public static final String UNIFIED_SUFFIX = "_unified"; - static final String UNIFIED_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/" + UNIFIED_SUFFIX; + static final String UNIFIED_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/_unified"; static final String UNIFIED_TASK_TYPE_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/{" + INFERENCE_ID - + "}/" - + UNIFIED_SUFFIX; + + "}/_unified"; private Paths() { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java index 1ddae3cc8df95..13d641101a1cf 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java @@ -42,7 +42,7 @@ import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.ENABLED; import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.MAX_NUMBER_OF_ALLOCATIONS; import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.MIN_NUMBER_OF_ALLOCATIONS; -import static org.elasticsearch.xpack.inference.rest.Paths.UNIFIED_SUFFIX; +import static org.elasticsearch.xpack.inference.rest.Paths.STREAM_SUFFIX; import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY; public final class ServiceUtils { @@ -796,7 +796,7 @@ public static String useChatCompletionUrlMessage(Model model) { model.getTaskType(), model.getTaskType(), model.getInferenceEntityId(), - UNIFIED_SUFFIX + STREAM_SUFFIX ); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java index 9dc23c890c14d..6248bf215d2d2 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java @@ -67,7 +67,7 @@ public void testStreamIsTrue() { """; RestRequest inferenceRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST) - .withPath("_inference/completion/test/_unified") + .withPath("_inference/chat_completion/test/_unified") .withContent(new BytesArray(requestBody), XContentType.JSON) .build(); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index 5d66486731f5e..743a3fb666ecd 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -436,7 +436,7 @@ public void testInfer_ThrowsErrorWhenTaskTypeIsNotValid_ChatCompletion() throws "Inference entity [model_id] does not support task type [chat_completion] " + "for inference, the task type must be one of [sparse_embedding]. " + "The task type for the inference entity is chat_completion, " - + "please use the _inference/chat_completion/model_id/_unified URL." + + "please use the _inference/chat_completion/model_id/_stream URL." ) ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java index 34539042c1f0b..687f5430904e4 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java @@ -937,7 +937,7 @@ public void testInfer_ThrowsErrorWhenTaskTypeIsNotValid_ChatCompletion() throws "Inference entity [model_id] does not support task type [chat_completion] " + "for inference, the task type must be one of [text_embedding, completion]. " + "The task type for the inference entity is chat_completion, " - + "please use the _inference/chat_completion/model_id/_unified URL." + + "please use the _inference/chat_completion/model_id/_stream URL." ) ); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml index cdc69001d33ef..62a49422079b8 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml @@ -25,18 +25,3 @@ } } - match: { error.reason: "Unknown task_type [bad]" } - ---- -"Test inference with bad task type": - - do: - catch: bad_request - inference.inference: - task_type: bad - inference_id: elser_model - body: > - { - "input": "important text" - } - - match: { error.reason: "Unknown task_type [bad]" } - -