change schema of efficient_attention_forward_ck to have optional tensor return

henrylhtsang · henrylhtsang · commit c1425d12b09f · 2025-06-16T11:23:02.000-07:00
diff --git a/xformers/csrc/attention/attention.cpp b/xformers/csrc/attention/attention.cpp
@@ -26,7 +26,7 @@ TORCH_LIBRARY_FRAGMENT(xformers, m) {
       "xformers::efficient_attention_forward_ck(Tensor query, "
       "Tensor key, Tensor value, Tensor? attn_bias, Tensor? seqstart_q, "
       "Tensor? seqstart_k, int? max_seqlen_q, float dropout_p, "
-      "bool compute_logsumexp, int custom_mask_type, float? scale, Tensor? seqlen_k, int? window_size, Tensor? block_tables, int? page_size) -> (Tensor, Tensor, int, int)"));
+      "bool compute_logsumexp, int custom_mask_type, float? scale, Tensor? seqlen_k, int? window_size, Tensor? block_tables, int? page_size) -> (Tensor, Tensor?, int, int)"));
   m.def(TORCH_SELECTIVE_SCHEMA(
       "xformers::efficient_attention_forward_decoder_ck(Tensor query, "
       "Tensor key, Tensor value, Tensor? seq_positions, float scale) -> Tensor"));
diff --git a/xformers/csrc/attention/hip_fmha/attention_forward_generic_ck_tiled.cpp b/xformers/csrc/attention/hip_fmha/attention_forward_generic_ck_tiled.cpp
@@ -48,7 +48,7 @@ namespace {
   (Mode BMHK) With all the heads having the same seqlen
   (Mode 1MHK) `batch=1` with all tokens across batches concatenated
 */
-std::tuple<at::Tensor, at::Tensor, int64_t, int64_t>
+std::tuple<at::Tensor, std::optional<at::Tensor>, int64_t, int64_t>
 efficient_attention_forward_ck(
     const at::Tensor& query, // [b, seqlen, num_heads_q, K]
     const at::Tensor& key, // [b, seqlen, num_heads_kv, K]
@@ -473,7 +473,7 @@ efficient_attention_forward_ck(
   (Mode BMHK) With all the heads having the same seqlen
   (Mode 1MHK) `batch=1` with all tokens across batches concatenated
 */
-std::tuple<at::Tensor, at::Tensor, int64_t, int64_t>
+std::tuple<at::Tensor, std::optional<at::Tensor>, int64_t, int64_t>
 efficient_attention_forward_ck_meta(
     const at::Tensor& query, // [b, seqlen, num_heads_q, K]
     const at::Tensor& key, // [b, seqlen, num_heads_kv, K]