diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 177542ed96c8..469bd0d435b6 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -4680,10 +4680,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): kv_transfer_group.set_host_xfer_buffer_ops(copy_kv_blocks) if self.dcp_world_size > 1: - layer_names = self.attn_groups[0][0].layer_names - layers = get_layers_from_vllm_config( - self.vllm_config, AttentionLayerBase, layer_names - ) + layers = get_layers_from_vllm_config(self.vllm_config, AttentionLayerBase) for layer in layers.values(): assert layer.impl.need_to_return_lse_for_decode, ( "DCP requires attention impls to return"