@@ -31,24 +31,21 @@ export const endpointOAIParametersSchema = z.object({
3131 defaultHeaders : z . record ( z . string ( ) ) . optional ( ) ,
3232 defaultQuery : z . record ( z . string ( ) ) . optional ( ) ,
3333 extraBody : z . record ( z . any ( ) ) . optional ( ) ,
34- multimodal : z
35- . object ( {
36- image : createImageProcessorOptionsValidator ( {
37- supportedMimeTypes : [
38- "image/png" ,
39- "image/jpeg" ,
40- "image/webp" ,
41- "image/avif" ,
42- "image/tiff" ,
43- "image/gif" ,
44- ] ,
45- preferredMimeType : "image/webp" ,
46- maxSizeInMB : Infinity ,
47- maxWidth : 4096 ,
48- maxHeight : 4096 ,
49- } ) ,
50- } )
51- . default ( { } ) ,
34+ multimodal : z
35+ . object ( {
36+ image : createImageProcessorOptionsValidator ( {
37+ supportedMimeTypes : [
38+ // Restrict to the most widely-supported formats
39+ "image/png" ,
40+ "image/jpeg" ,
41+ ] ,
42+ preferredMimeType : "image/jpeg" ,
43+ maxSizeInMB : 3 ,
44+ maxWidth : 2048 ,
45+ maxHeight : 2048 ,
46+ } ) ,
47+ } )
48+ . default ( { } ) ,
5249 /* enable use of max_completion_tokens in place of max_tokens */
5350 useCompletionTokens : z . boolean ( ) . default ( false ) ,
5451 streamingSupported : z . boolean ( ) . default ( true ) ,
@@ -118,11 +115,15 @@ export async function endpointOai(
118115
119116 return openAICompletionToTextGenerationStream ( openAICompletion ) ;
120117 } ;
121- } else if ( completion === "chat_completions" ) {
122- return async ( { messages, preprompt, generateSettings, conversationId } ) => {
118+ } else if ( completion === "chat_completions" ) {
119+ return async ( { messages, preprompt, generateSettings, conversationId, isMultimodal } ) => {
123120 // Format messages for the chat API, handling multimodal content if supported
124- let messagesOpenAI : OpenAI . Chat . Completions . ChatCompletionMessageParam [ ] =
125- await prepareMessages ( messages , imageProcessor , model . multimodal ) ;
121+ let messagesOpenAI : OpenAI . Chat . Completions . ChatCompletionMessageParam [ ] =
122+ await prepareMessages (
123+ messages ,
124+ imageProcessor ,
125+ isMultimodal ?? model . multimodal
126+ ) ;
126127
127128 // Check if a system message already exists as the first message
128129 const hasSystemMessage = messagesOpenAI . length > 0 && messagesOpenAI [ 0 ] ?. role === "system" ;
@@ -214,36 +215,34 @@ async function prepareMessages(
214215 imageProcessor : ReturnType < typeof makeImageProcessor > ,
215216 isMultimodal : boolean
216217) : Promise < OpenAI . Chat . Completions . ChatCompletionMessageParam [ ] > {
217- return Promise . all (
218- messages . map ( async ( message ) => {
219- if ( message . from === "user" && isMultimodal ) {
220- return {
221- role : message . from ,
222- content : [
223- ...( await prepareFiles ( imageProcessor , message . files ?? [ ] ) ) ,
224- { type : "text" , text : message . content } ,
225- ] ,
226- } ;
227- }
228- return {
229- role : message . from ,
230- content : message . content ,
231- } ;
232- } )
233- ) ;
218+ return Promise . all (
219+ messages . map ( async ( message ) => {
220+ if ( message . from === "user" && isMultimodal ) {
221+ const parts = [
222+ { type : "text" as const , text : message . content } ,
223+ ...( await prepareFiles ( imageProcessor , message . files ?? [ ] ) ) ,
224+ ] ;
225+ return { role : message . from , content : parts } ;
226+ }
227+ return { role : message . from , content : message . content } ;
228+ } )
229+ ) ;
234230}
235231
236232async function prepareFiles (
237- imageProcessor : ReturnType < typeof makeImageProcessor > ,
238- files : MessageFile [ ]
233+ imageProcessor : ReturnType < typeof makeImageProcessor > ,
234+ files : MessageFile [ ]
239235) : Promise < OpenAI . Chat . Completions . ChatCompletionContentPartImage [ ] > {
240- const processedFiles = await Promise . all (
241- files . filter ( ( file ) => file . mime . startsWith ( "image/" ) ) . map ( imageProcessor )
242- ) ;
243- return processedFiles . map ( ( file ) => ( {
244- type : "image_url" as const ,
245- image_url : {
246- url : `data:${ file . mime } ;base64,${ file . image . toString ( "base64" ) } ` ,
247- } ,
248- } ) ) ;
236+ const processedFiles = await Promise . all (
237+ files . filter ( ( file ) => file . mime . startsWith ( "image/" ) ) . map ( imageProcessor )
238+ ) ;
239+ return processedFiles . map ( ( file ) => ( {
240+ type : "image_url" as const ,
241+ image_url : {
242+ url : `data:${ file . mime } ;base64,${ file . image . toString ( "base64" ) } ` ,
243+ // Improves compatibility with some OpenAI-compatible servers
244+ // that expect an explicit detail setting.
245+ detail : "auto" ,
246+ } ,
247+ } ) ) ;
249248}
0 commit comments