From b10d47e0e099b29aecc20740daaaa1afe3476c59 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 23 Dec 2025 11:41:49 +0000 Subject: [PATCH] Add util function for checking nesting of rope parameters (#31146) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/model.py | 6 ++---- .../models/transformers/utils.py | 4 ++-- vllm/transformers_utils/config.py | 21 +++++++++++++++++-- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/vllm/config/model.py b/vllm/config/model.py index dd2b7b9d7a786..f2f39ac6af022 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -11,7 +11,6 @@ import torch from pydantic import ConfigDict, Field, field_validator, model_validator from pydantic.dataclasses import dataclass from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE -from transformers.configuration_utils import ALLOWED_LAYER_TYPES import vllm.envs as envs from vllm.attention.backends.registry import AttentionBackendEnum @@ -29,6 +28,7 @@ from vllm.transformers_utils.config import ( get_pooling_config, get_sentence_transformer_tokenizer_config, is_encoder_decoder, + is_rope_parameters_nested, try_get_dense_modules, try_get_generation_config, try_get_safetensors_metadata, @@ -2125,9 +2125,7 @@ def _get_and_verify_max_len( # In Transformers v5 rope_parameters could be TypedDict or dict[str, TypedDict]. # To simplify the verification, we convert it to dict[str, TypedDict]. rope_parameters = getattr(hf_config, "rope_parameters", None) - if rope_parameters and not set(rope_parameters.keys()).issubset( - ALLOWED_LAYER_TYPES - ): + if rope_parameters and not is_rope_parameters_nested(rope_parameters): rope_parameters = {"": rope_parameters} # NOTE(woosuk): Gemma3's max_model_len (128K) is already scaled by RoPE diff --git a/vllm/model_executor/models/transformers/utils.py b/vllm/model_executor/models/transformers/utils.py index b807f45b5d52b..c7844381eb633 100644 --- a/vllm/model_executor/models/transformers/utils.py +++ b/vllm/model_executor/models/transformers/utils.py @@ -22,7 +22,6 @@ from typing import TYPE_CHECKING, Literal import torch from torch import nn -from transformers.configuration_utils import ALLOWED_LAYER_TYPES from vllm.config.utils import getattr_iter from vllm.logger import init_logger @@ -32,6 +31,7 @@ from vllm.model_executor.layers.linear import ( ReplicatedLinear, RowParallelLinear, ) +from vllm.transformers_utils.config import is_rope_parameters_nested if TYPE_CHECKING: from vllm.config import VllmConfig @@ -207,7 +207,7 @@ def can_enable_torch_compile(vllm_config: "VllmConfig") -> bool: rope_parameters: dict | None = getattr(text_config, "rope_parameters", None) or {} if rope_parameters: # Nest rope_parameters if not nested already to simplify logic - if not set(rope_parameters.keys()).issubset(ALLOWED_LAYER_TYPES): + if not is_rope_parameters_nested(rope_parameters): rope_parameters = {"": rope_parameters} return all(rp["rope_type"] != "dynamic" for rp in rope_parameters.values()) return True diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 887f936a2d8ae..4635277c83585 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -15,7 +15,6 @@ from huggingface_hub import ( ) from packaging.version import Version from transformers import GenerationConfig, PretrainedConfig -from transformers.configuration_utils import ALLOWED_LAYER_TYPES from transformers.models.auto.image_processing_auto import get_image_processor_config from transformers.models.auto.modeling_auto import ( MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, @@ -44,6 +43,16 @@ from .repo_utils import ( with_retry, ) +try: + # Transformers v5 + from transformers.configuration_utils import ALLOWED_ATTENTION_LAYER_TYPES +except ImportError: + # Transformers v4 + from transformers.configuration_utils import ( + ALLOWED_LAYER_TYPES as ALLOWED_ATTENTION_LAYER_TYPES, + ) + + if envs.VLLM_USE_MODELSCOPE: from modelscope import AutoConfig else: @@ -104,6 +113,14 @@ _AUTO_CONFIG_KWARGS_OVERRIDES: dict[str, dict[str, Any]] = { } +def is_rope_parameters_nested(rope_parameters: dict[str, Any]) -> bool: + """Check if rope_parameters is nested by layer types.""" + # Cannot be nested if rope_parameters is empty + if not rope_parameters: + return False + return set(rope_parameters.keys()).issubset(ALLOWED_ATTENTION_LAYER_TYPES) + + class HFConfigParser(ConfigParserBase): def parse( self, @@ -346,7 +363,7 @@ def patch_rope_parameters(config: PretrainedConfig) -> None: config.rope_parameters["original_max_position_embeddings"] = ompe # Handle nested rope_parameters in interleaved sliding attention models - if set(config.rope_parameters.keys()).issubset(ALLOWED_LAYER_TYPES): + if is_rope_parameters_nested(config.rope_parameters): for rope_parameters_layer_type in config.rope_parameters.values(): patch_rope_parameters_dict(rope_parameters_layer_type) else: