niieani · niieani · Oct 9, 2025 · Oct 9, 2025
diff --git a/README.md b/README.md
@@ -86,6 +86,7 @@ import {
   encodeGenerator,
   decodeGenerator,
   decodeAsyncGenerator,
+  ALL_SPECIAL_TOKENS,
 } from 'gpt-tokenizer'
 // note: depending on the model, import from the respective file, e.g.:
 // import {...} from 'gpt-tokenizer/model/gpt-4o'
@@ -103,6 +104,11 @@ const decodedText = decode(tokens)
 // returns false if the limit is exceeded, otherwise returns the actual number of tokens (truthy value)
 const withinTokenLimit = isWithinTokenLimit(text, tokenLimit)
 
+// Allow special tokens when needed
+const withinTokenLimitWithSpecial = isWithinTokenLimit(text, tokenLimit, {
+  allowedSpecial: ALL_SPECIAL_TOKENS,
+})
+
 // Example chat:
 const chat = [
   { role: 'system', content: 'You are a helpful assistant.' },
@@ -115,6 +121,10 @@ const chatTokens = encodeChat(chat)
 // Check if chat is within the token limit
 const chatWithinTokenLimit = isWithinTokenLimit(chat, tokenLimit)
 
+const chatWithinTokenLimitWithSpecial = isWithinTokenLimit(chat, tokenLimit, {
+  allowedSpecial: ALL_SPECIAL_TOKENS,
+})
+
 // Encode text using generator
 for (const tokenChunk of encodeGenerator(text)) {
   console.log(tokenChunk)
@@ -227,18 +237,22 @@ const tokens = [18435, 198, 23132, 328]
 const text = decode(tokens)
 ```
 
-### `isWithinTokenLimit(text: string, tokenLimit: number): false | number`
+### `isWithinTokenLimit(text: string | Iterable<ChatMessage>, tokenLimit: number, encodeOptions?: EncodeOptions): false | number`
 
-Checks if the text is within the token limit. Returns `false` if the limit is exceeded, otherwise returns the number of tokens. Use this method to quickly check if a given text is within the token limit imposed by GPT models, without encoding the entire text.
+Checks if the input is within the token limit. Returns `false` if the limit is exceeded, otherwise returns the number of tokens. Use this method to quickly check if a given text or chat is within the token limit imposed by GPT models, without encoding the entire input. The optional `encodeOptions` parameter lets you configure special token handling.
 
 Example:
 
 ```typescript
-import { isWithinTokenLimit } from 'gpt-tokenizer'
+import { isWithinTokenLimit, ALL_SPECIAL_TOKENS } from 'gpt-tokenizer'
 
 const text = 'Hello, world!'
 const tokenLimit = 10
 const withinTokenLimit = isWithinTokenLimit(text, tokenLimit)
+
+const withinTokenLimitWithSpecial = isWithinTokenLimit(text, tokenLimit, {
+  allowedSpecial: ALL_SPECIAL_TOKENS,
+})
 ```
 
 ### `countTokens(text: string | Iterable<ChatMessage>, encodeOptions?: EncodeOptions): number`
@@ -255,9 +269,9 @@ const text = 'Hello, world!'
 const tokenCount = countTokens(text)
 ```
 
-### `encodeChat(chat: ChatMessage[], model?: ModelName): number[]`
+### `encodeChat(chat: ChatMessage[], model?: ModelName, encodeOptions?: EncodeOptions): number[]`
 
-Encodes the given chat into a sequence of tokens.
+Encodes the given chat into a sequence of tokens. The optional `encodeOptions` parameter lets you configure special token handling.
 
 If you didn't import the model version directly, or if `model` wasn't provided during initialization, it must be provided here to correctly tokenize the chat for a given model. Use this method when you need to transform a chat into the token format that the GPT models can process.
 
@@ -335,6 +349,7 @@ async function processTokens(asyncTokensIterator) {
 Estimates the cost of processing a given number of tokens using the model's pricing data. This function calculates costs for different API usage types (main API, batch API) and cached tokens when available.
 
 The function returns a `PriceData` object with the following structure:
+
 - `main`: Main API pricing with `input`, `output`, `cached_input`, and `cached_output` costs
 - `batch`: Batch API pricing with the same cost categories
 
@@ -362,7 +377,7 @@ Note that not all models support all of these tokens.
 
 By default, **all special tokens are disallowed**.
 
-The `encode`, `encodeGenerator` and `countTokens` functions accept an `EncodeOptions` parameter to customize special token handling:
+The `encode`, `encodeGenerator`, `encodeChat`, `encodeChatGenerator`, `countTokens`, and `isWithinTokenLimit` functions accept an `EncodeOptions` parameter to customize special token handling:
 
 ### Custom Allowed Sets
 

diff --git a/src/GptEncoding.test.ts b/src/GptEncoding.test.ts
@@ -101,6 +101,7 @@ describe.each(encodingNames)('%s', (encodingName: EncodingName) => {
     decodeAsyncGenerator,
     encode,
     isWithinTokenLimit,
+    countTokens,
   } = encoding
 
   describe('encode and decode', () => {
@@ -174,6 +175,32 @@ describe.each(encodingNames)('%s', (encodingName: EncodingName) => {
       expect(decode(encoded)).toEqual(str)
     })
 
+    test('isWithinTokenLimit handles special tokens when allowed', () => {
+      const str = `hello ${EndOfText} world`
+      expect(() => isWithinTokenLimit(str, 100)).toThrowError(
+        /Disallowed special token found/,
+      )
+
+      const allowedCount = isWithinTokenLimit(str, 100, {
+        allowedSpecial: ALL_SPECIAL_TOKENS,
+      })
+
+      expect(allowedCount).toBe(
+        encode(str, { allowedSpecial: ALL_SPECIAL_TOKENS }).length,
+      )
+    })
+
+    test('countTokens handles special tokens when allowed', () => {
+      const str = `hello ${EndOfText} world`
+      expect(() => countTokens(str)).toThrowError(
+        /Disallowed special token found/,
+      )
+
+      expect(countTokens(str, { allowedSpecial: ALL_SPECIAL_TOKENS })).toBe(
+        encode(str, { allowedSpecial: ALL_SPECIAL_TOKENS }).length,
+      )
+    })
+
     async function* getHelloWorldTokensAsync() {
       const str = 'hello 👋 world 🌍'
       for (const token of result[str]) {
@@ -290,6 +317,46 @@ describe.each(chatModelNames)('%s', async (modelName) => {
       )
       expect(isWithinTokenLimit).toBe(expectedEncodedLength)
     })
+
+    test('isWithinTokenLimit allows special tokens in chat when configured', () => {
+      const chatWithSpecial = [
+        { role: 'user', content: `Hello ${EndOfText} world` },
+      ] satisfies ChatMessage[]
+
+      expect(() =>
+        encoding.isWithinTokenLimit(chatWithSpecial, 100),
+      ).toThrowError(/Disallowed special token found/)
+
+      const allowedCount = encoding.isWithinTokenLimit(chatWithSpecial, 100, {
+        allowedSpecial: ALL_SPECIAL_TOKENS,
+      })
+
+      const encoded = encoding.encodeChat(chatWithSpecial, undefined, {
+        allowedSpecial: ALL_SPECIAL_TOKENS,
+      })
+
+      expect(allowedCount).toBe(encoded.length)
+    })
+
+    test('countTokens allows special tokens in chat when configured', () => {
+      const chatWithSpecial = [
+        { role: 'user', content: `Hello ${EndOfText} world` },
+      ] satisfies ChatMessage[]
+
+      expect(() => encoding.countTokens(chatWithSpecial)).toThrowError(
+        /Disallowed special token found/,
+      )
+
+      expect(
+        encoding.countTokens(chatWithSpecial, {
+          allowedSpecial: ALL_SPECIAL_TOKENS,
+        }),
+      ).toBe(
+        encoding.encodeChat(chatWithSpecial, undefined, {
+          allowedSpecial: ALL_SPECIAL_TOKENS,
+        }).length,
+      )
+    })
   })
 })
 

diff --git a/src/GptEncoding.ts b/src/GptEncoding.ts
@@ -263,10 +263,12 @@ export class GptEncoding {
    * Returns tokens assuming the 'gpt-3.5-turbo-0301' / 'gpt-4-0314' format.
    * Based on OpenAI's guidelines: https://github.com/openai/openai-python/blob/main/chatml.md
    * Also mentioned in section 6 of this document: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+   * @param encodeOptions Options controlling how special tokens are handled.
    */
   *encodeChatGenerator(
     chat: Iterable<ChatMessage>,
     model = this.modelName,
+    encodeOptions?: EncodeOptions,
   ): Generator<number[], void, undefined> {
     if (!model) {
       throw new Error(
@@ -304,14 +306,14 @@ export class GptEncoding {
       if (encodedRoleSeparator.length > 0) {
         yield encodedRoleSeparator
       }
-      yield* this.encodeGenerator(content)
+      yield* this.encodeGenerator(content, encodeOptions)
       yield [chatEndToken]
       yield encodedMessageSeparator
     }
 
     // every reply is primed with <|start|>assistant<|message|>
     yield [chatStartToken]
-    yield* this.encodeGenerator('assistant')
+    yield* this.encodeGenerator('assistant', encodeOptions)
     if (encodedRoleSeparator.length > 0) {
       yield encodedRoleSeparator
     }
@@ -323,22 +325,32 @@ export class GptEncoding {
    * Returns tokens assuming the 'gpt-3.5-turbo-0301' / 'gpt-4-0314' format.
    * Based on OpenAI's guidelines: https://github.com/openai/openai-python/blob/main/chatml.md
    * Also mentioned in section 6 of this document: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+   * @param encodeOptions Options controlling how special tokens are handled.
    */
-  encodeChat(chat: readonly ChatMessage[], model = this.modelName): number[] {
-    return [...this.encodeChatGenerator(chat, model)].flat()
+  encodeChat(
+    chat: readonly ChatMessage[],
+    model = this.modelName,
+    encodeOptions?: EncodeOptions,
+  ): number[] {
+    return [...this.encodeChatGenerator(chat, model, encodeOptions)].flat()
   }
 
   /**
+   * Checks whether the provided input stays within the provided token limit.
+   * @param input The string or chat messages to evaluate.
+   * @param tokenLimit The maximum allowed number of tokens.
+   * @param encodeOptions Options controlling how special tokens are handled.
    * @returns {false | number} false if token limit is exceeded, otherwise the number of tokens
    */
   isWithinTokenLimit(
     input: string | Iterable<ChatMessage>,
     tokenLimit: number,
+    encodeOptions?: EncodeOptions,
   ): false | number {
     const tokenGenerator =
       typeof input === 'string'
-        ? this.encodeGenerator(input)
-        : this.encodeChatGenerator(input)
+        ? this.encodeGenerator(input, encodeOptions)
+        : this.encodeChatGenerator(input, undefined, encodeOptions)
     let count = 0
     for (const tokens of tokenGenerator) {
       count += tokens.length
@@ -351,6 +363,8 @@ export class GptEncoding {
 
   /**
    * Counts the number of tokens in the input.
+   * @param input The string or chat messages to evaluate.
+   * @param encodeOptions Options controlling how special tokens are handled.
    * @returns {number} The number of tokens.
    */
   countTokens(
@@ -375,7 +389,11 @@ export class GptEncoding {
       )
     }
 
-    const tokenGenerator = this.encodeChatGenerator(input)
+    const tokenGenerator = this.encodeChatGenerator(
+      input,
+      undefined,
+      encodeOptions,
+    )
     let count = 0
     for (const tokens of tokenGenerator) {
       count += tokens.length