diff --git a/docs/reference/inference/chat-completion-inference.asciidoc b/docs/reference/inference/chat-completion-inference.asciidoc
index 1d7d05b0f7d82..88699cca67af4 100644
--- a/docs/reference/inference/chat-completion-inference.asciidoc
+++ b/docs/reference/inference/chat-completion-inference.asciidoc
@@ -13,9 +13,9 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo
 [[chat-completion-inference-api-request]]
 ==== {api-request-title}
 
-`POST /_inference/<inference_id>/_unified`
+`POST /_inference/<inference_id>/_stream`
 
-`POST /_inference/chat_completion/<inference_id>/_unified`
+`POST /_inference/chat_completion/<inference_id>/_stream`
 
 
 [discrete]
@@ -37,7 +37,7 @@ It only works with the `chat_completion` task type for `openai` and `elastic` {i
 
 [NOTE]
 ====
-* The `chat_completion` task type is only available within the _unified API and only supports streaming.
+* The `chat_completion` task type is only available within the _stream API and only supports streaming.
 * The Chat completion {infer} API and the Stream {infer} API differ in their response structure and capabilities.
 The Chat completion {infer} API provides more comprehensive customization options through more fields and function calling support.
 If you use the `openai` service or the `elastic` service, use the Chat completion {infer} API.
diff --git a/docs/reference/inference/elastic-infer-service.asciidoc b/docs/reference/inference/elastic-infer-service.asciidoc
index 24ae7e20deec6..0ed820e1d3324 100644
--- a/docs/reference/inference/elastic-infer-service.asciidoc
+++ b/docs/reference/inference/elastic-infer-service.asciidoc
@@ -39,7 +39,7 @@ Available task types:
 
 [NOTE]
 ====
-The `chat_completion` task type only supports streaming and only through the `_unified` API.
+The `chat_completion` task type only supports streaming and only through the `_stream` API.
 
 include::inference-shared.asciidoc[tag=chat-completion-docs]
 ====
@@ -121,4 +121,4 @@ PUT /_inference/chat_completion/chat-completion-endpoint
     }
 }
 ------------------------------------------------------------
-// TEST[skip:TBD]
\ No newline at end of file
+// TEST[skip:TBD]
diff --git a/docs/reference/inference/service-openai.asciidoc b/docs/reference/inference/service-openai.asciidoc
index 511632736a35b..d2c0dd460f9e7 100644
--- a/docs/reference/inference/service-openai.asciidoc
+++ b/docs/reference/inference/service-openai.asciidoc
@@ -38,7 +38,7 @@ Available task types:
 
 [NOTE]
 ====
-The `chat_completion` task type only supports streaming and only through the `_unified` API.
+The `chat_completion` task type only supports streaming and only through the `_stream` API.
 
 include::inference-shared.asciidoc[tag=chat-completion-docs]
 ====
diff --git a/docs/reference/search/search-your-data/cohere-es.asciidoc b/docs/reference/search/search-your-data/cohere-es.asciidoc
index 3029cfd9f098c..748ed2e0d4051 100644
--- a/docs/reference/search/search-your-data/cohere-es.asciidoc
+++ b/docs/reference/search/search-your-data/cohere-es.asciidoc
@@ -267,7 +267,7 @@ for hit in response["hits"]["hits"]:
 [[cohere-es-rerank-results]]
 ===== Rerank search results
 
-To combine the results more effectively, use 
+To combine the results more effectively, use
 https://docs.cohere.com/docs/rerank-2[Cohere's Rerank v3] model through the
 {infer} API to provide a more precise semantic reranking of the results.
 
@@ -297,7 +297,7 @@ Rerank the results using the new {infer} endpoint.
 [source,py]
 --------------------------------------------------
 # Pass the query and the search results to the service
-response = client.inference.inference(
+response = client.inference.rerank(
     inference_id="cohere_rerank",
     body={
         "query": query,
@@ -322,7 +322,7 @@ for document in ranked_documents[0:10]:
 --------------------------------------------------
 
 The response is a list of documents in descending order of relevance. Each
-document has a corresponding index that reflects the order of the documents when 
+document has a corresponding index that reflects the order of the documents when
 they were sent to the {infer} endpoint.
 
 
@@ -335,7 +335,7 @@ With the ranked results, you can build a RAG system on the top of what you previ
 
 Pass in the retrieved documents and the query to receive a grounded response using Cohere's newest generative model https://docs.cohere.com/docs/command-r-plus[Command R+].
 
-Then pass in the query and the documents to the Chat API, and print out the response. 
+Then pass in the query and the documents to the Chat API, and print out the response.
 
 [source,py]
 --------------------------------------------------
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json
new file mode 100644
index 0000000000000..98854625d0471
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.chat_completion_unified.json
@@ -0,0 +1,37 @@
+{
+  "inference.chat_completion_unified": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/chat-completion-inference.html",
+      "description": "Perform chat completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "text/event-stream"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/chat_completion/{inference_id}/_stream",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json
new file mode 100644
index 0000000000000..6c753e59e3434
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.completion.json
@@ -0,0 +1,37 @@
+{
+  "inference.completion": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform completion inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/completion/{inference_id}",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
index 14e7519c3796e..8887d9d0a1ebe 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.get.json
@@ -1,47 +1,49 @@
 {
-  "inference.get":{
-    "documentation":{
-      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html",
-      "description":"Get an inference endpoint"
+  "inference.get": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/get-inference-api.html",
+      "description": "Get an inference endpoint"
     },
-    "stability":"stable",
-    "visibility":"public",
-    "headers":{
-      "accept": [ "application/json"]
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ]
     },
-    "url":{
-      "paths":[
+    "url": {
+      "paths": [
         {
-          "path":"/_inference",
-          "methods":[
+          "path": "/_inference",
+          "methods": [
             "GET"
           ]
         },
         {
-          "path":"/_inference/{inference_id}",
-          "methods":[
+          "path": "/_inference/{inference_id}",
+          "methods": [
             "GET"
           ],
-          "parts":{
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         },
         {
-          "path":"/_inference/{task_type}/{inference_id}",
-          "methods":[
+          "path": "/_inference/{task_type}/{inference_id}",
+          "methods": [
             "GET"
           ],
-          "parts":{
-            "task_type":{
-              "type":"string",
-              "description":"The task type"
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
             },
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         }
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json
deleted file mode 100644
index eb4c1268c28ca..0000000000000
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.inference.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  "inference.inference":{
-    "documentation":{
-      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
-      "description":"Perform inference"
-    },
-    "stability":"stable",
-    "visibility":"public",
-    "headers":{
-      "accept": [ "application/json"],
-      "content_type": ["application/json"]
-    },
-    "url":{
-      "paths":[
-        {
-          "path":"/_inference/{inference_id}",
-          "methods":[
-            "POST"
-          ],
-          "parts":{
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
-            }
-          }
-        },
-        {
-          "path":"/_inference/{task_type}/{inference_id}",
-          "methods":[
-            "POST"
-          ],
-          "parts":{
-            "task_type":{
-              "type":"string",
-              "description":"The task type"
-            },
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
-            }
-          }
-        }
-      ]
-    },
-    "body":{
-      "description":"The inference payload"
-    }
-  }
-}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
index 411392fe39908..4879007724450 100644
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put.json
@@ -1,49 +1,53 @@
 {
-  "inference.put":{
-    "documentation":{
-      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html",
-      "description":"Configure an inference endpoint for use in the Inference API"
+  "inference.put": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/put-inference-api.html",
+      "description": "Configure an inference endpoint for use in the Inference API"
     },
-    "stability":"stable",
-    "visibility":"public",
-    "headers":{
-      "accept": [ "application/json"],
-      "content_type": ["application/json"]
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
     },
-    "url":{
-      "paths":[
+    "url": {
+      "paths": [
         {
-          "path":"/_inference/{inference_id}",
-          "methods":[
+          "path": "/_inference/{inference_id}",
+          "methods": [
             "PUT"
           ],
-          "parts":{
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         },
         {
-          "path":"/_inference/{task_type}/{inference_id}",
-          "methods":[
+          "path": "/_inference/{task_type}/{inference_id}",
+          "methods": [
             "PUT"
           ],
-          "parts":{
-            "task_type":{
-              "type":"string",
-              "description":"The task type"
+          "parts": {
+            "task_type": {
+              "type": "string",
+              "description": "The task type"
             },
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
             }
           }
         }
       ]
     },
-    "body":{
-      "description":"The inference endpoint's task and service settings"
+    "body": {
+      "description": "The inference endpoint's task and service settings"
     }
   }
 }
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json
new file mode 100644
index 0000000000000..c08a51a8b9b98
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.rerank.json
@@ -0,0 +1,37 @@
+{
+  "inference.rerank": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform reranking inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/rerank/{inference_id}",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json
new file mode 100644
index 0000000000000..90ebb6e6dc4c2
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.sparse_embedding.json
@@ -0,0 +1,37 @@
+{
+  "inference.sparse_embedding": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform sparse embedding inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/sparse_embedding/{inference_id}",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json
new file mode 100644
index 0000000000000..a1d770c46305b
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_completion.json
@@ -0,0 +1,37 @@
+{
+  "inference.stream_completion": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
+      "description": "Perform streaming inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "text/event-stream"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/completion/{inference_id}/_stream",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json
deleted file mode 100644
index 493306e10d5c7..0000000000000
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.stream_inference.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-  "inference.stream_inference":{
-    "documentation":{
-      "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/post-stream-inference-api.html",
-      "description":"Perform streaming inference"
-    },
-    "stability":"stable",
-    "visibility":"public",
-    "headers":{
-      "accept": [ "text/event-stream"],
-      "content_type": ["application/json"]
-    },
-    "url":{
-      "paths":[
-        {
-          "path":"/_inference/{inference_id}/_stream",
-          "methods":[
-            "POST"
-          ],
-          "parts":{
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
-            }
-          }
-        },
-        {
-          "path":"/_inference/{task_type}/{inference_id}/_stream",
-          "methods":[
-            "POST"
-          ],
-          "parts":{
-            "task_type":{
-              "type":"string",
-              "description":"The task type"
-            },
-            "inference_id":{
-              "type":"string",
-              "description":"The inference Id"
-            }
-          }
-        }
-      ]
-    },
-    "body":{
-      "description":"The inference payload"
-    }
-  }
-}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json
new file mode 100644
index 0000000000000..309a1d80b7416
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.text_embedding.json
@@ -0,0 +1,37 @@
+{
+  "inference.text_embedding": {
+    "documentation": {
+      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/post-inference-api.html",
+      "description": "Perform text embedding inference"
+    },
+    "stability": "stable",
+    "visibility": "public",
+    "headers": {
+      "accept": [
+        "application/json"
+      ],
+      "content_type": [
+        "application/json"
+      ]
+    },
+    "url": {
+      "paths": [
+        {
+          "path": "/_inference/text_embedding/{inference_id}",
+          "methods": [
+            "POST"
+          ],
+          "parts": {
+            "inference_id": {
+              "type": "string",
+              "description": "The inference Id"
+            }
+          }
+        }
+      ]
+    },
+    "body": {
+      "description": "The inference payload"
+    }
+  }
+}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json
deleted file mode 100644
index 84182d19f8825..0000000000000
--- a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.unified_inference.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-  "inference.unified_inference": {
-    "documentation": {
-      "url": "https://www.elastic.co/guide/en/elasticsearch/reference/master/unified-inference-api.html",
-      "description": "Perform inference using the Unified Schema"
-    },
-    "stability": "stable",
-    "visibility": "public",
-    "headers": {
-      "accept": ["text/event-stream"],
-      "content_type": ["application/json"]
-    },
-    "url": {
-      "paths": [
-        {
-          "path": "/_inference/{inference_id}/_unified",
-          "methods": ["POST"],
-          "parts": {
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
-        },
-        {
-          "path": "/_inference/{task_type}/{inference_id}/_unified",
-          "methods": ["POST"],
-          "parts": {
-            "task_type": {
-              "type": "string",
-              "description": "The task type"
-            },
-            "inference_id": {
-              "type": "string",
-              "description": "The inference Id"
-            }
-          }
-        }
-      ]
-    },
-    "body": {
-      "description": "The inference payload"
-    }
-  }
-}
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java
index 928da95d9c2f0..58c952b9c556a 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/ChatCompletionInput.java
@@ -7,6 +7,8 @@
 
 package org.elasticsearch.xpack.inference.external.http.sender;
 
+import org.elasticsearch.inference.TaskType;
+
 import java.util.List;
 import java.util.Objects;
 
@@ -15,7 +17,7 @@
  * The main difference between this class and {@link UnifiedChatInput} is this should only be used for
  * {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the
  * {@link org.elasticsearch.inference.InferenceService#infer} code path. These are requests sent to the
- * API without using the <code>_unified</code> route.
+ * API without using the {@link TaskType#CHAT_COMPLETION} task type.
  */
 public class ChatCompletionInput extends InferenceInputs {
     private final List<String> input;
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java
index fceec7c431182..f4f0511a4cc1b 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/UnifiedChatInput.java
@@ -10,6 +10,7 @@
 import org.elasticsearch.action.ActionListener;
 import org.elasticsearch.core.TimeValue;
 import org.elasticsearch.inference.Model;
+import org.elasticsearch.inference.TaskType;
 import org.elasticsearch.inference.UnifiedCompletionRequest;
 
 import java.util.List;
@@ -20,7 +21,7 @@
  * The main difference between this class and {@link ChatCompletionInput} is this should only be used for
  * {@link org.elasticsearch.inference.TaskType#COMPLETION} originating through the
  * {@link org.elasticsearch.inference.InferenceService#unifiedCompletionInfer(Model, UnifiedCompletionRequest, TimeValue, ActionListener)}
- * code path. These are requests sent to the API with the <code>_unified</code> route.
+ * code path. These are requests sent to the API with the <code>_stream</code> route and {@link TaskType#CHAT_COMPLETION}.
  */
 public class UnifiedChatInput extends InferenceInputs {
     private final UnifiedCompletionRequest request;
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java
index 7f43676dfb5f0..6d2a26b1f0966 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/Paths.java
@@ -24,22 +24,22 @@ public final class Paths {
     static final String INFERENCE_SERVICES_PATH = "_inference/_services";
     static final String TASK_TYPE_INFERENCE_SERVICES_PATH = "_inference/_services/{" + TASK_TYPE + "}";
 
-    static final String STREAM_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/_stream";
+    public static final String STREAM_SUFFIX = "_stream";
+    static final String STREAM_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/" + STREAM_SUFFIX;
     static final String STREAM_TASK_TYPE_INFERENCE_ID_PATH = "_inference/{"
         + TASK_TYPE_OR_INFERENCE_ID
         + "}/{"
         + INFERENCE_ID
-        + "}/_stream";
+        + "}/"
+        + STREAM_SUFFIX;
 
     // TODO remove the _unified path
-    public static final String UNIFIED_SUFFIX = "_unified";
-    static final String UNIFIED_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/" + UNIFIED_SUFFIX;
+    static final String UNIFIED_INFERENCE_ID_PATH = "_inference/{" + TASK_TYPE_OR_INFERENCE_ID + "}/_unified";
     static final String UNIFIED_TASK_TYPE_INFERENCE_ID_PATH = "_inference/{"
         + TASK_TYPE_OR_INFERENCE_ID
         + "}/{"
         + INFERENCE_ID
-        + "}/"
-        + UNIFIED_SUFFIX;
+        + "}/_unified";
 
     private Paths() {
 
diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java
index 1ddae3cc8df95..13d641101a1cf 100644
--- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java
+++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java
@@ -42,7 +42,7 @@
 import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.ENABLED;
 import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.MAX_NUMBER_OF_ALLOCATIONS;
 import static org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings.MIN_NUMBER_OF_ALLOCATIONS;
-import static org.elasticsearch.xpack.inference.rest.Paths.UNIFIED_SUFFIX;
+import static org.elasticsearch.xpack.inference.rest.Paths.STREAM_SUFFIX;
 import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY;
 
 public final class ServiceUtils {
@@ -796,7 +796,7 @@ public static String useChatCompletionUrlMessage(Model model) {
             model.getTaskType(),
             model.getTaskType(),
             model.getInferenceEntityId(),
-            UNIFIED_SUFFIX
+            STREAM_SUFFIX
         );
     }
 
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java
index 9dc23c890c14d..6248bf215d2d2 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/rest/RestUnifiedCompletionInferenceActionTests.java
@@ -67,7 +67,7 @@ public void testStreamIsTrue() {
             """;
 
         RestRequest inferenceRequest = new FakeRestRequest.Builder(xContentRegistry()).withMethod(RestRequest.Method.POST)
-            .withPath("_inference/completion/test/_unified")
+            .withPath("_inference/chat_completion/test/_unified")
             .withContent(new BytesArray(requestBody), XContentType.JSON)
             .build();
 
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java
index 5d66486731f5e..743a3fb666ecd 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java
@@ -436,7 +436,7 @@ public void testInfer_ThrowsErrorWhenTaskTypeIsNotValid_ChatCompletion() throws
                     "Inference entity [model_id] does not support task type [chat_completion] "
                         + "for inference, the task type must be one of [sparse_embedding]. "
                         + "The task type for the inference entity is chat_completion, "
-                        + "please use the _inference/chat_completion/model_id/_unified URL."
+                        + "please use the _inference/chat_completion/model_id/_stream URL."
                 )
             );
 
diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java
index 34539042c1f0b..687f5430904e4 100644
--- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java
+++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java
@@ -937,7 +937,7 @@ public void testInfer_ThrowsErrorWhenTaskTypeIsNotValid_ChatCompletion() throws
                     "Inference entity [model_id] does not support task type [chat_completion] "
                         + "for inference, the task type must be one of [text_embedding, completion]. "
                         + "The task type for the inference entity is chat_completion, "
-                        + "please use the _inference/chat_completion/model_id/_unified URL."
+                        + "please use the _inference/chat_completion/model_id/_stream URL."
                 )
             );
 
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml
index cdc69001d33ef..62a49422079b8 100644
--- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml
+++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/inference/inference_crud.yml
@@ -25,18 +25,3 @@
             }
           }
   - match: { error.reason: "Unknown task_type [bad]" }
-
----
-"Test inference with bad task type":
-  - do:
-      catch: bad_request
-      inference.inference:
-        task_type: bad
-        inference_id: elser_model
-        body: >
-          {
-            "input": "important text"
-          }
-  - match: { error.reason: "Unknown task_type [bad]" }
-
-