Skip to content

Commit d6233d6

Browse files
committed
save-load : handle -np 1
ggml-ci
1 parent d90b20d commit d6233d6

File tree

2 files changed

+10
-0
lines changed

2 files changed

+10
-0
lines changed

examples/embedding/embedding.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,11 @@ int main(int argc, char ** argv) {
8181

8282
params.embedding = true;
8383

84+
// if the number of prompts that would be encoded is known in advance, it's more efficient to specify the
85+
// --parallel argument accordingly. for convenience, if not specified, we fallback to unified KV cache
86+
// in order to support any number of prompts
8487
if (params.n_parallel == 1) {
88+
LOG_INF("%s: n_parallel == 1 -> unified KV cache is enabled\n", __func__);
8589
params.kv_unified = true;
8690
}
8791

examples/save-load-state/save-load-state.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,12 @@ int main(int argc, char ** argv) {
1515
return 1;
1616
}
1717

18+
if (params.n_parallel == 1) {
19+
// the example uses 2 sequences, so when n_parallel == 1, we need to enable unified kv cache
20+
printf("%s: n_parallel == 1, enabling unified kv cache\n", __func__);
21+
params.kv_unified = true;
22+
}
23+
1824
common_init();
1925

2026
if (params.n_predict < 0) {

0 commit comments

Comments
 (0)