mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-22 19:34:33 +08:00
[Cleanup] Remove no-longer-used SpeculativeConfig.enable_chunked_prefill (#27826)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
parent
fc16f1c477
commit
9e5bd3076e
@ -78,10 +78,6 @@ class SpeculativeConfig:
|
||||
draft_tensor_parallel_size: int | None = Field(default=None, ge=1)
|
||||
"""The degree of the tensor parallelism for the draft model. Can only be 1
|
||||
or the same as the target model's tensor parallel size."""
|
||||
disable_logprobs: bool = True
|
||||
"""If set to True, token log probabilities are not returned during
|
||||
speculative decoding. If set to False, token log probabilities are returned
|
||||
according to the log probability settings in SamplingParams."""
|
||||
|
||||
# Draft model configuration
|
||||
quantization: me_quant.QuantizationMethods | None = None
|
||||
@ -126,12 +122,6 @@ class SpeculativeConfig:
|
||||
"""The configuration of the target model."""
|
||||
target_parallel_config: SkipValidation[ParallelConfig] = None # type: ignore
|
||||
"""The parallel configuration for the target model."""
|
||||
enable_chunked_prefill: SkipValidation[bool] = None # type: ignore
|
||||
"""Whether vLLM is configured to use chunked prefill or not. Used for
|
||||
raising an error since it's not yet compatible with speculative decode."""
|
||||
disable_log_stats: SkipValidation[bool] = None # type: ignore
|
||||
"""Whether to disable the periodic printing of stage times in speculative
|
||||
decoding."""
|
||||
|
||||
# params generated in the post-init stage
|
||||
draft_model_config: SkipValidation[ModelConfig] = None # type: ignore
|
||||
|
||||
@ -1246,8 +1246,6 @@ class EngineArgs:
|
||||
self,
|
||||
target_model_config: ModelConfig,
|
||||
target_parallel_config: ParallelConfig,
|
||||
enable_chunked_prefill: bool,
|
||||
disable_log_stats: bool,
|
||||
) -> SpeculativeConfig | None:
|
||||
"""Initializes and returns a SpeculativeConfig object based on
|
||||
`speculative_config`.
|
||||
@ -1267,8 +1265,6 @@ class EngineArgs:
|
||||
{
|
||||
"target_model_config": target_model_config,
|
||||
"target_parallel_config": target_parallel_config,
|
||||
"enable_chunked_prefill": enable_chunked_prefill,
|
||||
"disable_log_stats": disable_log_stats,
|
||||
}
|
||||
)
|
||||
return SpeculativeConfig(**self.speculative_config)
|
||||
@ -1561,8 +1557,6 @@ class EngineArgs:
|
||||
speculative_config = self.create_speculative_config(
|
||||
target_model_config=model_config,
|
||||
target_parallel_config=parallel_config,
|
||||
enable_chunked_prefill=self.enable_chunked_prefill,
|
||||
disable_log_stats=self.disable_log_stats,
|
||||
)
|
||||
|
||||
# make sure num_lookahead_slots is set appropriately depending on
|
||||
|
||||
@ -241,6 +241,7 @@ async def build_async_engine_client_from_engine_args(
|
||||
)
|
||||
|
||||
# Don't keep the dummy data in memory
|
||||
assert async_llm is not None
|
||||
await async_llm.reset_mm_cache()
|
||||
|
||||
yield async_llm
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user