@@ -96,7 +96,7 @@ void MLACacheFormatter::format(TransferSession& session)
9696{
9797 NVTX3_SCOPED_RANGE (MLACacheFormatter_format);
9898 auto const & llmRequest = session.getLlmRequest ();
99- TLLM_LOG_INFO (
99+ TLLM_LOG_DEBUG (
100100 mpi::MpiComm::world ().getRank (), " Start sending KV cache for request ID: %ld." , llmRequest.mRequestId );
101101 auto const & selfConfig = session.getSelfState ().getCacheState ().value ();
102102 auto const & destConfig = session.getOtherState ().getCacheState ().value ();
@@ -137,7 +137,7 @@ void MLACacheFormatter::format(TransferSession& session)
137137 && destConfig.getParallelConfig ().mPipelineParallelism == selfConfig.getParallelConfig ().mPipelineParallelism )
138138 {
139139
140- TLLM_LOG_INFO (" Try using zero-copy for the KV cache." );
140+ TLLM_LOG_DEBUG (" Try using zero-copy for the KV cache." );
141141 NVTX3_SCOPED_RANGE (sendBufferFun);
142142
143143 TLLM_CUDA_CHECK (cudaSetDevice (deviceId));
@@ -149,7 +149,7 @@ void MLACacheFormatter::format(TransferSession& session)
149149 }
150150 }
151151
152- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (), " End the sending of KV cache for the request ID: %ld." ,
152+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (), " End the sending of KV cache for the request ID: %ld." ,
153153 llmRequest.mRequestId );
154154
155155 return ;
@@ -251,7 +251,7 @@ void MLACacheFormatter::format(TransferSession& session)
251251 {
252252 if (!common::getEnvEnableReceiveKVCacheParallel ())
253253 {
254- TLLM_LOG_INFO (" Disable parallel receiving of the KV cache." );
254+ TLLM_LOG_DEBUG (" Disable parallel receiving of the KV cache." );
255255 for (size_t i = 0 ; i < connections.size (); i++)
256256 {
257257 sendBufferFun (deviceId, i);
@@ -289,7 +289,7 @@ void MLACacheFormatter::format(TransferSession& session)
289289 }
290290 mCacheTransBufferManager ->freeBufferIndexForSend (cacheBufferId);
291291
292- TLLM_LOG_INFO (
292+ TLLM_LOG_DEBUG (
293293 mpi::MpiComm::world ().getRank (), " End the sending of KV cache for the request ID: %ld." , llmRequest.mRequestId );
294294}
295295
@@ -299,7 +299,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
299299 auto const & llmRequest = session.getLlmRequest ();
300300 TLLM_CHECK_WITH_INFO (llmRequest.mSamplingConfig .beamWidth == 1 , " Currently only supports beam width 1." );
301301 auto const ctxReqId = llmRequest.getContextPhaseParams ().value ().getReqId ();
302- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (),
302+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (),
303303 " Start receiving KV cache for request ID: %ld, context request ID: %ld." , llmRequest.mRequestId , ctxReqId);
304304 auto const & selfConfig = session.getSelfState ().getCacheState ().value ();
305305 auto const & destConfig = session.getOtherState ().getCacheState ().value ();
@@ -335,7 +335,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
335335 && destConfig.getParallelConfig ().mPipelineParallelism == selfConfig.getParallelConfig ().mPipelineParallelism )
336336 {
337337 // recv
338- TLLM_LOG_INFO (" Try zcopy for KV cache" );
338+ TLLM_LOG_DEBUG (" Try zcopy for KV cache" );
339339 NVTX3_SCOPED_RANGE (recvBufferFun);
340340 TLLM_CUDA_CHECK (cudaSetDevice (deviceId));
341341 TLLM_CHECK (pickUpConnections.size () == 1 );
@@ -347,7 +347,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
347347 session.recv (pickUpConnections[i], block->data (), block->getSizeInBytes ());
348348 }
349349 }
350- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (),
350+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (),
351351 " End receiving KV cache for request ID: %ld, context request ID: %ld." , llmRequest.mRequestId ,
352352 llmRequest.getContextPhaseParams ().value ().getReqId ());
353353 return ;
@@ -509,7 +509,7 @@ void MLACacheFormatter::unformat(TransferSession& session)
509509 mCacheTransBufferManager ->freeBufferIndexForRecv (cacheBufferId);
510510 }
511511
512- TLLM_LOG_INFO (mpi::MpiComm::world ().getRank (),
512+ TLLM_LOG_DEBUG (mpi::MpiComm::world ().getRank (),
513513 " End receiving KV cache for request ID: %ld, context request ID: %ld." , llmRequest.mRequestId ,
514514 llmRequest.getContextPhaseParams ().value ().getReqId ());
515515}
0 commit comments