From e2ed238885be6af358be1851cd43105b7d036c49 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Date: Sun, 14 Dec 2025 19:33:41 -0500 Subject: [PATCH] Revert "[Fix]Load kv-cache dtype from hf_quant_config.json automatically" (#30653) --- vllm/utils/torch_utils.py | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/vllm/utils/torch_utils.py b/vllm/utils/torch_utils.py index edcb79fbc9cd7..c97efce312b56 100644 --- a/vllm/utils/torch_utils.py +++ b/vllm/utils/torch_utils.py @@ -194,33 +194,12 @@ def get_kv_cache_torch_dtype( return torch_dtype -def get_kv_cache_quant_algo_dtype(quant_cfg: dict[str, Any]) -> torch.dtype | None: - quant_method = quant_cfg.get("quant_method", "") - if quant_method.startswith("modelopt"): - quantization_inner = quant_cfg.get("quantization", quant_cfg) - # Check if quant config is specified and use kv cache quant algo - kv_algo = quantization_inner.get("kv_cache_quant_algo") or quant_cfg.get( - "kv_cache_quant_algo" - ) - if isinstance(kv_algo, str): - return STR_DTYPE_TO_TORCH_DTYPE[kv_algo.lower()] - return None - - def kv_cache_dtype_str_to_dtype( kv_cache_dtype: str, model_config: ModelConfig ) -> torch.dtype: - # Model config may not be specified for unit tests, default to float16 - dtype = model_config.dtype if model_config else torch.half if kv_cache_dtype == "auto": - hf_cfg = getattr(model_config, "hf_config", None) - if hf_cfg is not None: - quant_cfg = getattr(hf_cfg, "quantization_config", None) - if quant_cfg is not None: - kv_algo_dtype = get_kv_cache_quant_algo_dtype(quant_cfg) - return kv_algo_dtype if kv_algo_dtype is not None else dtype - return dtype - + # Model config may not be specified for unit tests, default to float16 + return model_config.dtype if model_config else torch.half return STR_DTYPE_TO_TORCH_DTYPE[kv_cache_dtype]