@@ -206,15 +206,61 @@ def infer(self, input_list: List[Dict[str, Any]], **kwargs) -> List[T]:
206206 stm_token_usage = self .stm (self .workflow_instance_id ).get ('token_usage' , defaultdict (int ))
207207
208208 def process_stream (self , stream_output ):
209- for chunk in stream_output :
210- if chunk .usage is not None :
211- for key , value in chunk .usage .dict ().items ():
212- if key in ["prompt_tokens" , "completion_tokens" , 'total_tokens' ]:
213- if value is not None :
214- stm_token_usage [key ] += value
215- self .stm (self .workflow_instance_id )['token_usage' ] = stm_token_usage
216-
217- yield chunk
209+
210+ # local deepseek
211+ if hasattr (next (stream_output ).choices [0 ].delta , "reasoning_content" ):
212+ reasoning_flag = False
213+ answering_flag = False
214+ for chunk in stream_output :
215+ # output reasoning
216+ if chunk .choices [0 ].delta .reasoning_content is not None :
217+ if not reasoning_flag :
218+ chunk .choices [0 ].delta .content = "Reasoning:" + chunk .choices [0 ].delta .reasoning_content
219+ reasoning_flag = True
220+ else :
221+ chunk .choices [0 ].delta .content = chunk .choices [0 ].delta .reasoning_content
222+ # output Answering
223+ elif not answering_flag :
224+ chunk .choices [0 ].delta .content = "Answer:" + chunk .choices [0 ].delta .content
225+ answering_flag = True
226+ if chunk .usage is not None :
227+ for key , value in chunk .usage .dict ().items ():
228+ if key in ["prompt_tokens" , "completion_tokens" , 'total_tokens' ]:
229+ if value is not None :
230+ stm_token_usage [key ] += value
231+ self .stm (self .workflow_instance_id )['token_usage' ] = stm_token_usage
232+ yield chunk
233+
234+ # ollama deepseek
235+ elif "deepseek" in self .llm .model_id :
236+ reasoning_flag = False
237+ answering_flag = False
238+ for chunk in stream_output :
239+ if not reasoning_flag :
240+ chunk .choices [0 ].delta .content = "Reasoning:" + chunk .choices [0 ].delta .content
241+ reasoning_flag = True
242+ elif not answering_flag :
243+ if chunk .choices [0 ].delta .content == "</think>" :
244+ chunk .choices [0 ].delta .content = "Answer:"
245+ answering_flag = True
246+ if chunk .usage is not None :
247+ for key , value in chunk .usage .dict ().items ():
248+ if key in ["prompt_tokens" , "completion_tokens" , 'total_tokens' ]:
249+ if value is not None :
250+ stm_token_usage [key ] += value
251+ self .stm (self .workflow_instance_id )['token_usage' ] = stm_token_usage
252+ yield chunk
253+
254+ # models without reasoning
255+ else :
256+ for chunk in stream_output :
257+ if chunk .usage is not None :
258+ for key , value in chunk .usage .dict ().items ():
259+ if key in ["prompt_tokens" , "completion_tokens" , 'total_tokens' ]:
260+ if value is not None :
261+ stm_token_usage [key ] += value
262+ self .stm (self .workflow_instance_id )['token_usage' ] = stm_token_usage
263+ yield chunk
218264
219265 for prompt in prompts :
220266 output = self .llm .generate (prompt , ** kwargs )
0 commit comments