From cdc72e3c80b7029c49de9667150f68481f386956 Mon Sep 17 00:00:00 2001 From: Hui Liu <96135754+hliuca@users.noreply.github.com> Date: Tue, 8 Oct 2024 23:43:06 -0700 Subject: [PATCH] [Model] Remap FP8 kv_scale in CommandR and DBRX (#9174) --- vllm/model_executor/models/commandr.py | 8 +++++++- vllm/model_executor/models/dbrx.py | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index a0b8ff3a85c9..578cd2f04861 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -41,7 +41,8 @@ from vllm.model_executor.layers.sampler import Sampler, SamplerOutput from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( - default_weight_loader, row_parallel_weight_loader) + default_weight_loader, maybe_remap_kv_scale_name, + row_parallel_weight_loader) from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.sequence import IntermediateTensors @@ -426,6 +427,11 @@ class CohereForCausalLM(nn.Module, SupportsLoRA, SupportsPP): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue + # Remapping the name of FP8 kv-scale. + name = maybe_remap_kv_scale_name(name, params_dict) + if name is None: + continue + if is_pp_missing_parameter(name, self): continue param = params_dict[name] diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py index b0b07e9c03a9..aae7ab7370b7 100644 --- a/vllm/model_executor/models/dbrx.py +++ b/vllm/model_executor/models/dbrx.py @@ -18,7 +18,8 @@ from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.sampler import Sampler, SamplerOutput from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) -from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.model_loader.weight_utils import ( + default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs.dbrx import DbrxConfig @@ -425,6 +426,11 @@ class DbrxForCausalLM(nn.Module, SupportsPP): weight_loader(param, loaded_weight, weight_name) break else: + # Remapping the name of FP8 kv-scale. + name = maybe_remap_kv_scale_name(name, params_dict) + if name is None: + continue + if is_pp_missing_parameter(name, self): continue param = params_dict[name]