HuggingFace LogitsProcessor to accept also list of compiled grammars (#275)

lukaszkolodziejczyk · web-flow · commit 3eb02a871a4a · 2025-04-04T01:44:24.000-04:00
My use case is that I need to follow a different grammar for each row in a batch. I couldn't find a way to do this natively with current API, so started hacking and came up with the changes in the PR. I'm not sure about the broader context, so would appreciate a comment if this approach indeed makes sense. ``` class CountryPoland(ClassSchema): country: Literal["Poland"] class CountryGermany(ClassSchema): country: Literal["Germany"] tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer, vocab_size=config.vocab_size) grammar_compiler = xgr.GrammarCompiler(tokenizer_info) compiled_grammars = [ grammar_compiler.compile_json_schema(CountryPoland), grammar_compiler.compile_json_schema(CountryGermany), ] logit_processor = LogitsProcessor(compiled_grammars) logit_processor = LogitsProcessorList([logit_processor]) model.generate(**model_inputs, max_new_tokens=512, logits_processor=logit_processor) ``` [Linked issue](#276).
diff --git a/python/xgrammar/contrib/hf.py b/python/xgrammar/contrib/hf.py
@@ -3,7 +3,7 @@
 transformers.LogitsProcessor, which is to be fed to `model.generate()`.
 """
 
-from typing import List
+from typing import List, Union
 
 import torch
 import transformers
@@ -40,17 +40,19 @@ class LogitsProcessor(transformers.LogitsProcessor):
         - Note that this implementation may contain extra overhead.
     """
 
-    def __init__(self, compiled_grammar: xgr.CompiledGrammar):
+    def __init__(self, compiled_grammar: Union[xgr.CompiledGrammar, List[xgr.CompiledGrammar]]):
         """Initialize the LogitsProcessor.
 
         Parameters
         ----------
-        compiled_grammar : xgr.CompiledGrammar
-            A grammar compiled according to the given grammar and the model's tokenizer_info.
+        compiled_grammar : xgr.CompiledGrammar | List[xgr.CompiledGrammar]
+            One or more grammars compiled according to the given grammar and the model's tokenizer_info.
         """
         self.matchers: List[xgr.GrammarMatcher] = []
-        self.compiled_grammar = compiled_grammar
-        self.full_vocab_size = self.compiled_grammar.tokenizer_info.vocab_size
+        self.compiled_grammars: List[xgr.CompiledGrammar] = (
+            compiled_grammar if isinstance(compiled_grammar, list) else [compiled_grammar]
+        )
+        self.full_vocab_size = self.compiled_grammars[0].tokenizer_info.vocab_size
         self.token_bitmask = None
         self.prefilled = False
         self.batch_size = 0
@@ -65,8 +67,16 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to
         # Lazily initialize GrammarMatchers and bitmask
         if len(self.matchers) == 0:
             self.batch_size = input_ids.shape[0]
+            self.compiled_grammars = (
+                self.compiled_grammars
+                if len(self.compiled_grammars) > 1
+                else self.compiled_grammars * self.batch_size
+            )
+            assert (
+                len(self.compiled_grammars) == self.batch_size
+            ), "The number of compiled grammars must be equal to the batch size."
             self.matchers = [
-                xgr.GrammarMatcher(self.compiled_grammar) for _ in range(self.batch_size)
+                xgr.GrammarMatcher(self.compiled_grammars[i]) for i in range(self.batch_size)
             ]
             self.token_bitmask = xgr.allocate_token_bitmask(self.batch_size, self.full_vocab_size)