[4.5/N] bugfix for quant config in speculative decode (#10007)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao 2024-11-04 15:11:59 -08:00 committed by GitHub
parent d93478b399
commit 2094062b4e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -61,6 +61,10 @@ def create_spec_worker(*args, **kwargs) -> "SpecDecodeWorker":
draft_worker_config = copy.deepcopy(vllm_config)
draft_worker_config.model_config = speculative_config.draft_model_config
draft_worker_config.quant_config = VllmConfig._get_quantization_config(
draft_worker_config.model_config,
vllm_config.load_config,
)
draft_worker_config.parallel_config = speculative_config.draft_parallel_config # noqa
# TODO allow draft-model specific load config.