diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index 04ad51aae0a8..0ccbe6549349 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -234,7 +234,7 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer): try: token = self.stream.step(self.tokenizer, next_token_id) except Exception as e: - if str(e) != INVALID_PREFIX_ERR_MSG: + if not str(e).startswith(INVALID_PREFIX_ERR_MSG): raise e # Recover from edge case where tokenizer can produce non-monotonic, # invalid UTF-8 output, which breaks the internal state of @@ -243,7 +243,8 @@ class FastIncrementalDetokenizer(BaseIncrementalDetokenizer): logger.warning( "Encountered invalid prefix detokenization error" " for request %s, resetting decode stream.", self.request_id) - self.stream = DecodeStream(self.skip_special_tokens) + self.stream = DecodeStream( + skip_special_tokens=self.skip_special_tokens) token = self.stream.step(self.tokenizer, next_token_id) return token