Skip to content

Commit 4f64377

Browse files
committed
fix: add 'clearMergeCache'
1 parent 15d13b1 commit 4f64377

8 files changed

Lines changed: 27 additions & 0 deletions

File tree

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,14 @@ setMergeCacheSize(5000)
393393
setMergeCacheSize(0)
394394
```
395395

396+
The cache is persisted between encoding calls. To explicitly clear the cache (e.g. to free up memory), use the `clearMergeCache` function:
397+
398+
```ts
399+
import { clearMergeCache } from 'gpt-tokenizer'
400+
401+
clearMergeCache()
402+
```
403+
396404
## Testing and Validation
397405

398406
`gpt-tokenizer` includes a set of test cases in the [TestPlans.txt](./data/TestPlans.txt) file to ensure its compatibility with OpenAI's Python `tiktoken` library. These test cases validate the functionality and behavior of `gpt-tokenizer`, providing a reliable reference for developers.

src/BytePairEncodingCore.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,10 @@ export class BytePairEncodingCore {
104104
}
105105
}
106106

107+
clearMergeCache(): void {
108+
this.mergeCache?.clear()
109+
}
110+
107111
*encodeNativeGenerator(
108112
text: string,
109113
allowedSpecial?: Set<string>,

src/GptEncoding.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ export class GptEncoding {
128128
this.encodeChatGenerator = this.encodeChatGenerator.bind(this)
129129
this.countTokens = this.countTokens.bind(this)
130130
this.setMergeCacheSize = this.setMergeCacheSize.bind(this)
131+
this.clearMergeCache = this.clearMergeCache.bind(this)
131132
this.modelName = modelName
132133
}
133134

@@ -366,6 +367,10 @@ export class GptEncoding {
366367
this.bytePairEncodingCoreProcessor.setMergeCacheSize(size)
367368
}
368369

370+
clearMergeCache(): void {
371+
this.bytePairEncodingCoreProcessor.clearMergeCache()
372+
}
373+
369374
decode(inputTokensToDecode: Iterable<number>): string {
370375
return this.bytePairEncodingCoreProcessor.decodeNative(inputTokensToDecode)
371376
}

src/encoding/cl100k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ const {
1818
encodeChatGenerator,
1919
vocabularySize,
2020
setMergeCacheSize,
21+
clearMergeCache,
2122
} = api
2223
export {
24+
clearMergeCache,
2325
countTokens,
2426
decode,
2527
decodeAsyncGenerator,

src/encoding/o200k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ const {
1818
encodeChatGenerator,
1919
vocabularySize,
2020
setMergeCacheSize,
21+
clearMergeCache,
2122
} = api
2223
export {
24+
clearMergeCache,
2325
countTokens,
2426
decode,
2527
decodeAsyncGenerator,

src/encoding/p50k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ const {
1616
countTokens,
1717
vocabularySize,
1818
setMergeCacheSize,
19+
clearMergeCache,
1920
} = api
2021
export {
22+
clearMergeCache,
2123
countTokens,
2224
decode,
2325
decodeAsyncGenerator,

src/encoding/p50k_edit.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ const {
1616
countTokens,
1717
vocabularySize,
1818
setMergeCacheSize,
19+
clearMergeCache,
1920
} = api
2021
export {
22+
clearMergeCache,
2123
countTokens,
2224
decode,
2325
decodeAsyncGenerator,

src/encoding/r50k_base.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@ const {
1616
countTokens,
1717
vocabularySize,
1818
setMergeCacheSize,
19+
clearMergeCache,
1920
} = api
2021
export {
22+
clearMergeCache,
2123
countTokens,
2224
decode,
2325
decodeAsyncGenerator,

0 commit comments

Comments
 (0)