elastic · jonathan-buttner · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/docs/reference/inference/chat-completion-inference.asciidoc b/docs/reference/inference/chat-completion-inference.asciidoc
@@ -13,9 +13,9 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo
 [[chat-completion-inference-api-request]]
 ==== {api-request-title}
 
-`POST /_inference/<inference_id>/_unified`
+`POST /_inference/<inference_id>/_stream`
 
-`POST /_inference/chat_completion/<inference_id>/_unified`
+`POST /_inference/chat_completion/<inference_id>/_stream`
 
 
 [discrete]
@@ -37,7 +37,7 @@ It only works with the `chat_completion` task type for `openai` and `elastic` {i
 
 [NOTE]
 ====
-* The `chat_completion` task type is only available within the _unified API and only supports streaming.
+* The `chat_completion` task type is only available within the _stream API and only supports streaming.
 * The Chat completion {infer} API and the Stream {infer} API differ in their response structure and capabilities.
 The Chat completion {infer} API provides more comprehensive customization options through more fields and function calling support.
 If you use the `openai` service or the `elastic` service, use the Chat completion {infer} API.

diff --git a/docs/reference/inference/elastic-infer-service.asciidoc b/docs/reference/inference/elastic-infer-service.asciidoc
@@ -39,7 +39,7 @@ Available task types:
 
 [NOTE]
 ====
-The `chat_completion` task type only supports streaming and only through the `_unified` API.
+The `chat_completion` task type only supports streaming and only through the `_stream` API.
 
 include::inference-shared.asciidoc[tag=chat-completion-docs]
 ====
@@ -121,4 +121,4 @@ PUT /_inference/chat_completion/chat-completion-endpoint
     }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
+// TEST[skip:TBD]
diff --git a/docs/reference/inference/service-openai.asciidoc b/docs/reference/inference/service-openai.asciidoc
@@ -38,7 +38,7 @@ Available task types:
 
 [NOTE]
 ====
-The `chat_completion` task type only supports streaming and only through the `_unified` API.
+The `chat_completion` task type only supports streaming and only through the `_stream` API.
 
 include::inference-shared.asciidoc[tag=chat-completion-docs]
 ====

diff --git a/docs/reference/search/search-your-data/cohere-es.asciidoc b/docs/reference/search/search-your-data/cohere-es.asciidoc
@@ -267,7 +267,7 @@ for hit in response["hits"]["hits"]:
 [[cohere-es-rerank-results]]
 ===== Rerank search results
 
-To combine the results more effectively, use 
+To combine the results more effectively, use
 https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the
 {infer} API to provide a more precise semantic reranking of the results.
 
@@ -297,7 +297,7 @@ Rerank the results using the new {infer} endpoint.
 [source,py]
 --------------------------------------------------
 # Pass the query and the search results to the service
-response = client.inference.inference(
+response = client.inference.rerank(
     inference_id="cohere_rerank",
     body={
         "query": query,
@@ -322,7 +322,7 @@ for document in ranked_documents[0:10]:
 --------------------------------------------------
 
 The response is a list of documents in descending order of relevance. Each
-document has a corresponding index that reflects the order of the documents when 
+document has a corresponding index that reflects the order of the documents when
 they were sent to the {infer} endpoint.
 
 
@@ -335,7 +335,7 @@ With the ranked results, you can build a RAG system on the top of what you previ
 
 Pass in the retrieved documents and the query to receive a grounded response using Cohere's newest generative model https://docs.cohere.com/docs/command-r-plus[Command R+].
 
-Then pass in the query and the documents to the Chat API, and print out the response. 
+Then pass in the query and the documents to the Chat API, and print out the response.
 
 [source,py]
 --------------------------------------------------

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json
@@ -0,0 +1,37 @@
+{
+  "inference.chat_completion_unified": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
+      "description": "Perform chat completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "text/event-stream"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/chat_completion/{inference_id}/_stream",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json
@@ -0,0 +1,37 @@
+{
+  "inference.completion": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/completion/{inference_id}",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
@@ -1,47 +1,49 @@
 {
-  "inference.get":{
-    "documentation":{
-      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html",
-      "description":"Get an inference endpoint"
+  "inference.get": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html",
+      "description": "Get an inference endpoint"
     },
-    "stability":"stable",
-    "visibility":"public",
-    "headers":{
-      "accept": [ "application/json"]
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ]
     },
-    "url":{
-      "paths":[
+    "url": {
+      "paths": [
         {
-          "path":"/_inference",
-          "methods":[
+          "path": "/_inference",
+          "methods": [
             "GET"
           ]
         },
         {
-          "path":"/_inference/{inference_id}",
-          "methods":[
+          "path": "/_inference/{inference_id}",
+          "methods": [
             "GET"
           ],
-          "parts":{
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         },
         {
-          "path":"/_inference/{task_type}/{inference_id}",
-          "methods":[
+          "path": "/_inference/{task_type}/{inference_id}",
+          "methods": [
             "GET"
           ],
-          "parts":{
-            "task_type":{
-              "type":"string",
-              "description":"The task type"
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
             },
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         }

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
@@ -1,49 +1,53 @@
 {
-  "inference.put":{
-    "documentation":{
-      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html",
-      "description":"Configure an inference endpoint for use in the Inference API"
+  "inference.put": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html",
+      "description": "Configure an inference endpoint for use in the Inference API"
     },
-    "stability":"stable",
-    "visibility":"public",
-    "headers":{
-      "accept": [ "application/json"],
-      "content_type": ["application/json"]
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
     },
-    "url":{
-      "paths":[
+    "url": {
+      "paths": [
         {
-          "path":"/_inference/{inference_id}",
-          "methods":[
+          "path": "/_inference/{inference_id}",
+          "methods": [
             "PUT"
           ],
-          "parts":{
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         },
         {
-          "path":"/_inference/{task_type}/{inference_id}",
-          "methods":[
+          "path": "/_inference/{task_type}/{inference_id}",
+          "methods": [
             "PUT"
           ],
-          "parts":{
-            "task_type":{
-              "type":"string",
-              "description":"The task type"
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
             },
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         }
       ]
     },
-    "body":{
-      "description":"The inference endpoint's task and service settings"
+    "body": {
+      "description": "The inference endpoint's task and service settings"
     }
   }
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -38,7 +38,7 @@ Available task types: @@
     [NOTE]
     ====
-    The `chat_completion` task type only supports streaming and only through the `_unified` API.
+    The `chat_completion` task type only supports streaming and only through the `_stream` API.
     include::inference-shared.asciidoc[tag=chat-completion-docs]
     ====
@@ Expand Down @@