From 1867c258bda3bc6adb07090c508fd85e3ceed547 Mon Sep 17 00:00:00 2001 From: Eldar Kurtic Date: Sat, 1 Feb 2025 06:07:46 +0100 Subject: [PATCH] Fix target matching for fused layers with compressed-tensors (#12617) Without this PR --------------- Quantizing models with llm-compressor and a recipe that explicitly lists names of layers produces a model that is not loadable by vLLM (i.e. `vllm serve ` fails with `raise ValueError(f"Unable to find matching target for {module} in the ...`). Example recipe: ``` recipe = """ quantization_stage: run_type: oneshot quantization_modifiers: GPTQModifier: ignore: ["lm_head"] config_groups: group_0: weights: num_bits: 4 type: "int" symmetric: true strategy: "group" group_size: 128 targets: [ "model.layers.0.mlp.down_proj", "model.layers.2.mlp.down_proj", "model.layers.3.mlp.down_proj", "model.layers.4.mlp.down_proj", "model.layers.5.mlp.down_proj", "model.layers.6.mlp.down_proj", "model.layers.7.mlp.down_proj", "model.layers.8.mlp.down_proj", "model.layers.9.mlp.down_proj", "model.layers.10.mlp.down_proj", "model.layers.11.mlp.down_proj", "model.layers.12.mlp.down_proj", "model.layers.13.mlp.down_proj", "model.layers.14.mlp.down_proj", "model.layers.15.mlp.down_proj", "model.layers.16.mlp.down_proj", "model.layers.17.mlp.down_proj", "model.layers.19.mlp.down_proj", "model.layers.21.mlp.down_proj", "model.layers.22.mlp.down_proj", . . . ] """ ``` To reproduce the vLLM error: ```bash vllm serve nm-testing/eldar-test ``` With this PR ------------ Models are loaded correctly without any errors. --- .../quantization/compressed_tensors/utils.py | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py index 8fcbda377428..5bab5a02d83b 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/utils.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/utils.py @@ -103,7 +103,8 @@ def find_matched_target(layer_name: Optional[str], module: Module, matched_target = (_find_first_match(layer_name, targets) or _find_first_match(module.__class__.__name__, targets, - True)) + True) + or _match_fused_layer(layer_name, targets)) if matched_target is None: raise ValueError(f"Unable to find matching target for {module} in the " @@ -152,3 +153,41 @@ def _is_equal_or_regex_match(value: str, elif target == value: return True return False + + +def _match_fused_layer(layer_name: str, + target_layers: Iterable[str]) -> Optional[str]: + """ + Match a fused layer name to its corresponding individual layer in + target_layers. + + Examples: + layer_name = "model.layers.0.self_attn.qkv_proj" + target_layers = ["model.layers.0.self_attn.q_proj", + "model.layers.0.self_attn.k_proj", + "model.layers.0.self_attn.v_proj"] + """ + # Split into parent path and layer type + # e.g., "model.layers.0.self_attn" and "qkv_proj" + parent_path = ".".join(layer_name.split(".")[:-1]) + layer_type = layer_name.split(".")[-1] + + if layer_type not in FUSED_LAYER_NAME_MAPPING: + return None + + possible_layer_types = FUSED_LAYER_NAME_MAPPING[layer_type] + + # Look for a target layer that: + # 1. Has the same parent path + # 2. Ends with one of the possible individual layer types + for target in target_layers: + is_same_parent = parent_path in target + is_matching_type = any(type_suffix in target + for type_suffix in possible_layer_types) + + if is_same_parent and is_matching_type and all( + '.'.join([parent_path, type_suffix]) + for type_suffix in possible_layer_types): + return target + + return None