diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 14f78f927df43..37df18b231057 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2270,6 +2270,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): bad_words_token_ids={}, logitsprocs=LogitsProcessors(), token_ids=None, + num_tokens=None, + num_prompt_tokens=None, ) try: sampler_output = self.sampler(logits=logits,