[Cleanup] Remove no-longer-used SpeculativeConfig.enable_chunked_prefill (#27826)

Signed-off-by: Nick Hill <nhill@redhat.com>
2026-01-22 19:34:33 +08:00 · 2025-10-31 10:57:45 -07:00 · 2025-10-31 10:57:45 -07:00 · 9e5bd3076e
commit 9e5bd3076e
parent fc16f1c477
3 changed files with 1 additions and 16 deletions
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@ -78,10 +78,6 @@ class SpeculativeConfig:
    draft_tensor_parallel_size: int | None = Field(default=None, ge=1)
    """The degree of the tensor parallelism for the draft model. Can only be 1
    or the same as the target model's tensor parallel size."""
-    disable_logprobs: bool = True
-    """If set to True, token log probabilities are not returned during
-    speculative decoding. If set to False, token log probabilities are returned
-    according to the log probability settings in SamplingParams."""

    # Draft model configuration
    quantization: me_quant.QuantizationMethods | None = None
@ -126,12 +122,6 @@ class SpeculativeConfig:
    """The configuration of the target model."""
    target_parallel_config: SkipValidation[ParallelConfig] = None  # type: ignore
    """The parallel configuration for the target model."""
-    enable_chunked_prefill: SkipValidation[bool] = None  # type: ignore
-    """Whether vLLM is configured to use chunked prefill or not. Used for
-    raising an error since it's not yet compatible with speculative decode."""
-    disable_log_stats: SkipValidation[bool] = None  # type: ignore
-    """Whether to disable the periodic printing of stage times in speculative
-    decoding."""

    # params generated in the post-init stage
    draft_model_config: SkipValidation[ModelConfig] = None  # type: ignore
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1246,8 +1246,6 @@ class EngineArgs:
        self,
        target_model_config: ModelConfig,
        target_parallel_config: ParallelConfig,
-        enable_chunked_prefill: bool,
-        disable_log_stats: bool,
    ) -> SpeculativeConfig | None:
        """Initializes and returns a SpeculativeConfig object based on
        `speculative_config`.
@ -1267,8 +1265,6 @@ class EngineArgs:
            {
                "target_model_config": target_model_config,
                "target_parallel_config": target_parallel_config,
-                "enable_chunked_prefill": enable_chunked_prefill,
-                "disable_log_stats": disable_log_stats,
            }
        )
        return SpeculativeConfig(**self.speculative_config)
@ -1561,8 +1557,6 @@ class EngineArgs:
        speculative_config = self.create_speculative_config(
            target_model_config=model_config,
            target_parallel_config=parallel_config,
-            enable_chunked_prefill=self.enable_chunked_prefill,
-            disable_log_stats=self.disable_log_stats,
        )

        # make sure num_lookahead_slots is set appropriately depending on
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@ -241,6 +241,7 @@ async def build_async_engine_client_from_engine_args(
        )

        # Don't keep the dummy data in memory
+        assert async_llm is not None
        await async_llm.reset_mm_cache()

        yield async_llm