From f5b0846ba0aa6a4b6ab788ff257d0a00eb376e75 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 2 Dec 2025 07:05:27 +0000 Subject: [PATCH] Fix some Transformers nightly tests (#29802) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/jina_vl.py | 2 +- vllm/model_executor/models/modernbert.py | 51 ++++++++++++------------ vllm/model_executor/models/qwen2.py | 2 +- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/vllm/model_executor/models/jina_vl.py b/vllm/model_executor/models/jina_vl.py index 05a40837954d8..8bba7b62882f1 100644 --- a/vllm/model_executor/models/jina_vl.py +++ b/vllm/model_executor/models/jina_vl.py @@ -29,7 +29,7 @@ logger = init_logger(__name__) class JinaVLScorer(nn.Module): def __init__(self, model_config: "ModelConfig"): super().__init__() - config = model_config.hf_config + config = model_config.hf_config.get_text_config() head_dtype = model_config.head_dtype self.dense = ColumnParallelLinear( config.hidden_size, config.hidden_size, params_dtype=head_dtype, bias=True diff --git a/vllm/model_executor/models/modernbert.py b/vllm/model_executor/models/modernbert.py index 743bc23d9876f..be36f761c63aa 100644 --- a/vllm/model_executor/models/modernbert.py +++ b/vllm/model_executor/models/modernbert.py @@ -20,7 +20,7 @@ from vllm.model_executor.layers.pooler import ( PoolingParamsUpdate, PoolingType, ) -from vllm.model_executor.layers.rotary_embedding import RotaryEmbedding +from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import VocabParallelEmbedding from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.sequence import IntermediateTensors @@ -62,19 +62,6 @@ class ModernBertEmbeddings(nn.Module): return embeddings -class ModernBertRotaryEmbedding(RotaryEmbedding): - def __init__(self, config: ModernBertConfig, head_size: int, dim: int, base: float): - super().__init__( - head_size=head_size, - rotary_dim=dim, - max_position_embeddings=config.max_position_embeddings, - base=base, - is_neox_style=True, - dtype=torch.float16, - ) - self.config = config - - class ModernBertAttention(nn.Module): def __init__(self, config: ModernBertConfig, layer_id: int | None = None): super().__init__() @@ -95,19 +82,33 @@ class ModernBertAttention(nn.Module): bias=config.attention_bias, ) - sliding_window = None - if layer_id % config.global_attn_every_n_layers != 0: - sliding_window = config.local_attention // 2 - rope_theta = ( - config.local_rope_theta - if config.local_rope_theta is not None - else config.global_rope_theta - ) + if layer_types := getattr(config, "layer_types", None): + # Transformers v5 + layer_type = layer_types[layer_id] + rope_parameters = config.rope_parameters[layer_type] + sliding_window: int | None = None + if layer_type == "sliding_attention": + sliding_window = config.local_attention // 2 else: - rope_theta = config.global_rope_theta + # Transformers v4 + sliding_window = None + if layer_id % config.global_attn_every_n_layers != 0: + sliding_window = config.local_attention // 2 + rope_theta = ( + config.local_rope_theta + if config.local_rope_theta is not None + else config.global_rope_theta + ) + else: + rope_theta = config.global_rope_theta + rope_parameters = {"rope_type": "default", "rope_theta": rope_theta} - self.rotary_emb = ModernBertRotaryEmbedding( - config=config, head_size=self.head_dim, dim=self.head_dim, base=rope_theta + self.rotary_emb = get_rope( + head_size=self.head_dim, + rotary_dim=self.head_dim, + max_position=config.max_position_embeddings, + rope_parameters=rope_parameters, + dtype=torch.float16, ) self.attn = EncoderOnlyAttention( self.num_heads, diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index 34c31d8deee23..f5501bae78418 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -503,7 +503,7 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() - config = vllm_config.model_config.hf_config + config = vllm_config.model_config.hf_config.get_text_config() quant_config = vllm_config.quant_config self.config = config