GH-4235: Add support for OpenAI service_tier in OpenAiChatOptions

rafaelrddc · sobychacko · commit ad2e1bcda55e · 2025-08-27T13:40:58.000-04:00
Fixes #4235 Signed-off-by: Rafael Cunha <12313126+rafaelrddc@users.noreply.github.com>
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/OpenAiChatOptions.java
@@ -233,6 +233,11 @@ public class OpenAiChatOptions implements ToolCallingChatOptions {
 	 */
 	private @JsonProperty("web_search_options") WebSearchOptions webSearchOptions;
 
+	/**
+	 * Specifies the <a href="https://platform.openai.com/docs/api-reference/responses/create#responses_create-service_tier">processing type</a> used for serving the request.
+	 */
+	private @JsonProperty("service_tier") String serviceTier;
+
 	/**
 	 * Collection of {@link ToolCallback}s to be used for tool calling in the chat completion requests.
 	 */
@@ -301,6 +306,7 @@ public static OpenAiChatOptions fromOptions(OpenAiChatOptions fromOptions) {
 			.reasoningEffort(fromOptions.getReasoningEffort())
 			.webSearchOptions(fromOptions.getWebSearchOptions())
 			.verbosity(fromOptions.getVerbosity())
+			.serviceTier(fromOptions.getServiceTier())
 			.build();
 	}
 
@@ -605,6 +611,14 @@ public void setVerbosity(String verbosity) {
 		this.verbosity = verbosity;
 	}
 
+	public String getServiceTier() {
+		return serviceTier;
+	}
+
+	public void setServiceTier(String serviceTier) {
+		this.serviceTier = serviceTier;
+	}
+
 	@Override
 	public OpenAiChatOptions copy() {
 		return OpenAiChatOptions.fromOptions(this);
@@ -617,7 +631,7 @@ public int hashCode() {
 				this.streamOptions, this.seed, this.stop, this.temperature, this.topP, this.tools, this.toolChoice,
 				this.user, this.parallelToolCalls, this.toolCallbacks, this.toolNames, this.httpHeaders,
 				this.internalToolExecutionEnabled, this.toolContext, this.outputModalities, this.outputAudio,
-				this.store, this.metadata, this.reasoningEffort, this.webSearchOptions);
+				this.store, this.metadata, this.reasoningEffort, this.webSearchOptions, this.serviceTier);
 	}
 
 	@Override
@@ -651,7 +665,8 @@ public boolean equals(Object o) {
 				&& Objects.equals(this.metadata, other.metadata)
 				&& Objects.equals(this.reasoningEffort, other.reasoningEffort)
 				&& Objects.equals(this.webSearchOptions, other.webSearchOptions)
-				&& Objects.equals(this.verbosity, other.verbosity);
+				&& Objects.equals(this.verbosity, other.verbosity)
+				&& Objects.equals(this.serviceTier, other.serviceTier);
 	}
 
 	@Override
@@ -909,6 +924,16 @@ public Builder verbosity(String verbosity) {
 			return this;
 		}
 
+		public Builder serviceTier(String serviceTier) {
+			this.options.serviceTier = serviceTier;
+			return this;
+		}
+
+		public Builder serviceTier(OpenAiApi.ServiceTier serviceTier) {
+			this.options.serviceTier = serviceTier.getValue();
+			return this;
+		}
+
 		public OpenAiChatOptions build() {
 			return this.options;
 		}
diff --git a/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java b/models/spring-ai-openai/src/main/java/org/springframework/ai/openai/api/OpenAiApi.java
@@ -1345,6 +1345,41 @@ public record Approximate(@JsonProperty("city") String city, @JsonProperty("coun
 
 	} // @formatter:on
 
+	/**
+	 * Specifies the processing type used for serving the request.
+	 */
+	public enum ServiceTier {
+
+		/**
+		 * Then the request will be processed with the service tier configured in the
+		 * Project settings.
+		 */
+		AUTO("auto"),
+		/**
+		 * Then the request will be processed with the standard pricing.
+		 */
+		DEFAULT("default"),
+		/**
+		 * Then the request will be processed with the flex pricing.
+		 */
+		FLEX("flex"),
+		/**
+		 * Then the request will be processed with the priority pricing.
+		 */
+		PRIORITY("priority");
+
+		private final String value;
+
+		private ServiceTier(String value) {
+			this.value = value;
+		}
+
+		public String getValue() {
+			return value;
+		}
+
+	}
+
 	/**
 	 * Message comprising the conversation.
 	 *
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/OpenAiChatOptionsTests.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/OpenAiChatOptionsTests.java
@@ -26,6 +26,7 @@
 import org.springframework.ai.openai.api.OpenAiApi;
 import org.springframework.ai.openai.api.OpenAiApi.ChatCompletionRequest.AudioParameters;
 import org.springframework.ai.openai.api.OpenAiApi.ChatCompletionRequest.StreamOptions;
+import org.springframework.ai.openai.api.OpenAiApi.ServiceTier;
 import org.springframework.ai.openai.api.ResponseFormat;
 
 import static org.assertj.core.api.Assertions.assertThat;
@@ -83,17 +84,19 @@ void testBuilderWithAllFields() {
 			.internalToolExecutionEnabled(false)
 			.httpHeaders(Map.of("header1", "value1"))
 			.toolContext(toolContext)
+			.serviceTier(ServiceTier.PRIORITY)
 			.build();
 
 		assertThat(options)
 			.extracting("model", "frequencyPenalty", "logitBias", "logprobs", "topLogprobs", "maxTokens",
 					"maxCompletionTokens", "n", "outputModalities", "outputAudio", "presencePenalty", "responseFormat",
 					"streamOptions", "seed", "stop", "temperature", "topP", "tools", "toolChoice", "user",
 					"parallelToolCalls", "store", "metadata", "reasoningEffort", "internalToolExecutionEnabled",
-					"httpHeaders", "toolContext")
+					"httpHeaders", "toolContext", "serviceTier")
 			.containsExactly("test-model", 0.5, logitBias, true, 5, null, 50, 2, outputModalities, outputAudio, 0.8,
 					responseFormat, streamOptions, 12345, stopSequences, 0.7, 0.9, tools, toolChoice, "test-user", true,
-					false, metadata, "medium", false, Map.of("header1", "value1"), toolContext);
+					false, metadata, "medium", false, Map.of("header1", "value1"), toolContext,
+					ServiceTier.PRIORITY.getValue());
 
 		assertThat(options.getStreamUsage()).isTrue();
 		assertThat(options.getStreamOptions()).isEqualTo(StreamOptions.INCLUDE_USAGE);
@@ -141,6 +144,7 @@ void testCopy() {
 			.reasoningEffort("low")
 			.internalToolExecutionEnabled(true)
 			.httpHeaders(Map.of("header1", "value1"))
+			.serviceTier(ServiceTier.DEFAULT)
 			.build();
 
 		OpenAiChatOptions copiedOptions = originalOptions.copy();
@@ -189,6 +193,7 @@ void testSetters() {
 		options.setReasoningEffort("high");
 		options.setInternalToolExecutionEnabled(false);
 		options.setHttpHeaders(Map.of("header2", "value2"));
+		options.setServiceTier(ServiceTier.DEFAULT.getValue());
 
 		assertThat(options.getModel()).isEqualTo("test-model");
 		assertThat(options.getFrequencyPenalty()).isEqualTo(0.5);
@@ -223,6 +228,7 @@ void testSetters() {
 		options.setStopSequences(List.of("s1", "s2"));
 		assertThat(options.getStopSequences()).isEqualTo(List.of("s1", "s2"));
 		assertThat(options.getStop()).isEqualTo(List.of("s1", "s2"));
+		assertThat(options.getServiceTier()).isEqualTo("default");
 	}
 
 	@Test
@@ -258,6 +264,7 @@ void testDefaultValues() {
 		assertThat(options.getToolContext()).isEqualTo(new HashMap<>());
 		assertThat(options.getStreamUsage()).isFalse();
 		assertThat(options.getStopSequences()).isNull();
+		assertThat(options.getServiceTier()).isNull();
 	}
 
 	@Test
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiApiIT.java
@@ -218,4 +218,22 @@ void chatCompletionEntityWithGpt5ChatAndTemperatureShouldSucceed(OpenAiApi.ChatM
 		assertThat(response.getBody().model()).containsIgnoringCase(modelName.getValue());
 	}
 
+	@ParameterizedTest(name = "{0} : {displayName}")
+	@EnumSource(names = { "DEFAULT", "PRIORITY" })
+	void chatCompletionEntityWithServiceTier(OpenAiApi.ServiceTier serviceTier) {
+		ChatCompletionMessage chatCompletionMessage = new ChatCompletionMessage(
+				"What is the answer to the ultimate question of life, the universe, and everything?", Role.USER);
+
+		ChatCompletionRequest request = new ChatCompletionRequest(List.of(chatCompletionMessage), // messages
+				OpenAiApi.ChatModel.GPT_4_O.value, null, null, null, null, null, null, null, null, null, null, null,
+				null, null, null, serviceTier.getValue(), null, false, null, 1.0, null, null, null, null, null, null,
+				null, null);
+
+		ResponseEntity<ChatCompletion> response = this.openAiApi.chatCompletionEntity(request);
+
+		assertThat(response).isNotNull();
+		assertThat(response.getBody()).isNotNull();
+		assertThat(response.getBody().serviceTier()).containsIgnoringCase(serviceTier.getValue());
+	}
+
 }
diff --git a/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiStreamFunctionCallingHelperTest.java b/models/spring-ai-openai/src/test/java/org/springframework/ai/openai/api/OpenAiStreamFunctionCallingHelperTest.java
@@ -23,6 +23,7 @@
 import static org.assertj.core.api.Assertions.assertThat;
 import org.junit.jupiter.api.Test;
 import org.mockito.Mockito;
+import org.springframework.ai.openai.api.OpenAiApi.ServiceTier;
 
 /**
  * Unit tests for {@link OpenAiStreamFunctionCallingHelper}
@@ -36,7 +37,7 @@ public class OpenAiStreamFunctionCallingHelperTest {
 	@Test
 	public void merge_whenInputIsValid() {
 		var expectedResult = new OpenAiApi.ChatCompletionChunk("id", Collections.emptyList(),
-				System.currentTimeMillis(), "model", "serviceTier", "fingerPrint", "object", null);
+				System.currentTimeMillis(), "model", "default", "fingerPrint", "object", null);
 		var previous = new OpenAiApi.ChatCompletionChunk(null, null, expectedResult.created(), expectedResult.model(),
 				expectedResult.serviceTier(), null, null, null);
 		var current = new OpenAiApi.ChatCompletionChunk(expectedResult.id(), null, null, null, null,
diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/chat/openai-chat.adoc
@@ -177,6 +177,7 @@ The `JSON_SCHEMA` type enables link:https://platform.openai.com/docs/guides/stru
 | spring.ai.openai.chat.options.parallel-tool-calls | Whether to enable link:https://platform.openai.com/docs/guides/function-calling/parallel-function-calling[parallel function calling] during tool use. | true
 | spring.ai.openai.chat.options.http-headers | Optional HTTP headers to be added to the chat completion request. To override the `api-key` you need to use an `Authorization` header key, and you have to prefix the key value with the `Bearer` prefix. | -
 | spring.ai.openai.chat.options.proxy-tool-calls | If true, the Spring AI will not handle the function calls internally, but will proxy them to the client. Then is the client's responsibility to handle the function calls, dispatch them to the appropriate function, and return the results. If false (the default), the Spring AI will handle the function calls internally. Applicable only for chat models with function calling support | false
+| spring.ai.openai.chat.options.service-tier | Specifies the link:https://platform.openai.com/docs/api-reference/responses/create#responses_create-service_tier[processing type] used for serving the request. | -
 |====
 
 [NOTE]