Fix RoPE failures in Transformers nightly (#29700)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor 2025-11-30 14:29:32 +00:00 committed by GitHub
parent 8c363ed666
commit cd719de5cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 2 additions and 19 deletions

View File

@ -300,25 +300,10 @@ def set_default_rope_theta(config: PretrainedConfig, default_theta: float) -> No
def patch_rope_parameters(config: PretrainedConfig) -> None:
"""Provide backwards compatibility for RoPE."""
# Patch rope_parameters differently based on Transformers version
if Version(version("transformers")) >= Version("5.0.0.dev0"):
from transformers.modeling_rope_utils import (
rope_config_validation,
standardize_rope_params,
)
# When Transformers v5 is installed, legacy rope_theta may be present
# when using custom code models written for Transformers v4
if (rope_theta := getattr(config, "rope_theta", None)) is not None:
standardize_rope_params(config, rope_theta=rope_theta)
rope_config_validation(config)
# Delete rope_theta to avoid confusion in downstream code
del config.rope_theta
else:
# When Transformers v4 is installed, legacy rope_scaling may be present
if Version(version("transformers")) < Version("5.0.0.dev0"):
# Transformers v4 installed, legacy config fields may be present
if (rope_scaling := getattr(config, "rope_scaling", None)) is not None:
config.rope_parameters = rope_scaling
# When Transformers v4 is installed, legacy rope_theta may be present
if (rope_theta := getattr(config, "rope_theta", None)) is not None:
if not hasattr(config, "rope_parameters"):
config.rope_parameters = {"rope_type": "default"}

View File

@ -17,7 +17,6 @@
"""Qwen3-Next model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.modeling_rope_utils import rope_config_validation
from transformers.utils import logging
logger = logging.get_logger(__name__)
@ -245,7 +244,6 @@ class Qwen3NextConfig(PretrainedConfig):
self.attention_bias = attention_bias
self.attention_dropout = attention_dropout
self.head_dim = head_dim
rope_config_validation(self)
self.layer_types = layer_types
if self.layer_types is None: