From de75b0bb701c89c1b2bffe09b23f86446c951f73 Mon Sep 17 00:00:00 2001 From: Andrey Khalyavin Date: Wed, 26 Nov 2025 02:45:58 +0300 Subject: [PATCH] [BugFix] Fix initialization of draft model. (#29319) Signed-off-by: Andrey Khalyavin Signed-off-by: Tyler Michael Smith Co-authored-by: Tyler Michael Smith --- vllm/v1/worker/gpu_model_runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index bb44c5ad84cc1..9f3c34b15e2a8 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3460,6 +3460,10 @@ class GPUModelRunner( scope="local", ) prepare_communication_buffer_for_model(self.model) + if (drafter := getattr(self, "drafter", None)) and ( + drafter_model := getattr(drafter, "model", None) + ): + prepare_communication_buffer_for_model(drafter_model) mm_config = self.model_config.multimodal_config self.is_multimodal_pruning_enabled = ( supports_multimodal_pruning(self.get_model())