mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-22 00:25:55 +08:00
[4.5/N] bugfix for quant config in speculative decode (#10007)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
d93478b399
commit
2094062b4e
@ -61,6 +61,10 @@ def create_spec_worker(*args, **kwargs) -> "SpecDecodeWorker":
|
|||||||
|
|
||||||
draft_worker_config = copy.deepcopy(vllm_config)
|
draft_worker_config = copy.deepcopy(vllm_config)
|
||||||
draft_worker_config.model_config = speculative_config.draft_model_config
|
draft_worker_config.model_config = speculative_config.draft_model_config
|
||||||
|
draft_worker_config.quant_config = VllmConfig._get_quantization_config(
|
||||||
|
draft_worker_config.model_config,
|
||||||
|
vllm_config.load_config,
|
||||||
|
)
|
||||||
draft_worker_config.parallel_config = speculative_config.draft_parallel_config # noqa
|
draft_worker_config.parallel_config = speculative_config.draft_parallel_config # noqa
|
||||||
# TODO allow draft-model specific load config.
|
# TODO allow draft-model specific load config.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user