From 2f1cc8cef17de3b7a48d9351c2b71f1ba7c75d82 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 4 Nov 2025 10:01:56 -0800 Subject: [PATCH] Remove deprecated `--rope-scaling` and `--rope-theta` (#28006) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/model.py | 27 --------------------------- vllm/engine/arg_utils.py | 6 ------ 2 files changed, 33 deletions(-) diff --git a/vllm/config/model.py b/vllm/config/model.py index 17d3162695b54..44c044c76168d 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -168,12 +168,6 @@ class ModelConfig: """The specific revision to use for the model code on the Hugging Face Hub. It can be a branch name, a tag name, or a commit id. If unspecified, will use the default version.""" - rope_scaling: dict[str, Any] = field(default_factory=dict) - """RoPE scaling configuration. For example, - `{"rope_type":"dynamic","factor":2.0}`.""" - rope_theta: float | None = None - """RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE - theta improves the performance of the scaled model.""" tokenizer_revision: str | None = None """The specific revision to use for the tokenizer on the Hugging Face Hub. It can be a branch name, a tag name, or a commit id. If unspecified, will @@ -338,8 +332,6 @@ class ModelConfig: factors.append(self.generation_config) factors.append(self.model_impl) factors.append(self.override_generation_config) - factors.append(self.rope_scaling) - factors.append(self.rope_theta) factors.append(self.video_pruning_rate) factors.append(self.enable_prompt_embeds) @@ -481,25 +473,6 @@ class ModelConfig: hf_overrides_kw[key] = value hf_overrides_fn = None - if self.rope_scaling: - hf_override: dict[str, Any] = {"rope_scaling": self.rope_scaling} - hf_overrides_kw.update(hf_override) - hf_overrides_str = json.dumps(hf_overrides_kw) - msg = ( - "`--rope-scaling` will be removed in a future release. " - f"'Please instead use `--hf-overrides '{hf_overrides_str}'`" - ) - warnings.warn(DeprecationWarning(msg), stacklevel=2) - if self.rope_theta is not None: - hf_override = {"rope_theta": self.rope_theta} - hf_overrides_kw.update(hf_override) - hf_overrides_str = json.dumps(hf_overrides_kw) - msg = ( - "`--rope-theta` will be removed in a future release. " - f"'Please instead use `--hf-overrides '{hf_overrides_str}'`" - ) - warnings.warn(DeprecationWarning(msg), stacklevel=2) - self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) if ( diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 14fd4e70ad6c0..0b2b8bbe678a0 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -438,8 +438,6 @@ class EngineArgs: aggregate_engine_logging: bool = False revision: str | None = ModelConfig.revision code_revision: str | None = ModelConfig.code_revision - rope_scaling: dict[str, Any] = get_field(ModelConfig, "rope_scaling") - rope_theta: float | None = ModelConfig.rope_theta hf_token: bool | str | None = ModelConfig.hf_token hf_overrides: HfOverrides = get_field(ModelConfig, "hf_overrides") tokenizer_revision: str | None = ModelConfig.tokenizer_revision @@ -617,8 +615,6 @@ class EngineArgs: ) model_group.add_argument("--revision", **model_kwargs["revision"]) model_group.add_argument("--code-revision", **model_kwargs["code_revision"]) - model_group.add_argument("--rope-scaling", **model_kwargs["rope_scaling"]) - model_group.add_argument("--rope-theta", **model_kwargs["rope_theta"]) model_group.add_argument( "--tokenizer-revision", **model_kwargs["tokenizer_revision"] ) @@ -1184,8 +1180,6 @@ class EngineArgs: seed=self.seed, revision=self.revision, code_revision=self.code_revision, - rope_scaling=self.rope_scaling, - rope_theta=self.rope_theta, hf_token=self.hf_token, hf_overrides=self.hf_overrides, tokenizer_revision=self.tokenizer_revision,