mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 01:35:24 +08:00
[Frontend] Customizable RoPE theta (#5197)
This commit is contained in:
parent
00e6a2dc53
commit
dcbf4286af
@ -63,8 +63,9 @@ def test_get_sliding_window():
|
|||||||
assert mistral_model_config.get_sliding_window() == TEST_SLIDING_WINDOW
|
assert mistral_model_config.get_sliding_window() == TEST_SLIDING_WINDOW
|
||||||
|
|
||||||
|
|
||||||
def test_rope_scaling():
|
def test_rope_customization():
|
||||||
TEST_ROPE_SCALING = {"type": "dynamic", "factor": 2.0}
|
TEST_ROPE_SCALING = {"type": "dynamic", "factor": 2.0}
|
||||||
|
TEST_ROPE_THETA = 16_000_000.0
|
||||||
LONGCHAT_ROPE_SCALING = {"type": "linear", "factor": 8.0}
|
LONGCHAT_ROPE_SCALING = {"type": "linear", "factor": 8.0}
|
||||||
|
|
||||||
llama_model_config = ModelConfig(
|
llama_model_config = ModelConfig(
|
||||||
@ -76,6 +77,7 @@ def test_rope_scaling():
|
|||||||
seed=0,
|
seed=0,
|
||||||
)
|
)
|
||||||
assert getattr(llama_model_config.hf_config, "rope_scaling", None) is None
|
assert getattr(llama_model_config.hf_config, "rope_scaling", None) is None
|
||||||
|
assert getattr(llama_model_config.hf_config, "rope_theta", None) == 500_000
|
||||||
assert llama_model_config.max_model_len == 8192
|
assert llama_model_config.max_model_len == 8192
|
||||||
|
|
||||||
llama_model_config = ModelConfig(
|
llama_model_config = ModelConfig(
|
||||||
@ -86,9 +88,12 @@ def test_rope_scaling():
|
|||||||
dtype="float16",
|
dtype="float16",
|
||||||
seed=0,
|
seed=0,
|
||||||
rope_scaling=TEST_ROPE_SCALING,
|
rope_scaling=TEST_ROPE_SCALING,
|
||||||
|
rope_theta=TEST_ROPE_THETA,
|
||||||
)
|
)
|
||||||
assert getattr(llama_model_config.hf_config, "rope_scaling",
|
assert getattr(llama_model_config.hf_config, "rope_scaling",
|
||||||
None) == TEST_ROPE_SCALING
|
None) == TEST_ROPE_SCALING
|
||||||
|
assert getattr(llama_model_config.hf_config, "rope_theta",
|
||||||
|
None) == TEST_ROPE_THETA
|
||||||
assert llama_model_config.max_model_len == 16384
|
assert llama_model_config.max_model_len == 16384
|
||||||
|
|
||||||
longchat_model_config = ModelConfig(
|
longchat_model_config = ModelConfig(
|
||||||
|
|||||||
@ -93,6 +93,7 @@ class ModelConfig:
|
|||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
code_revision: Optional[str] = None,
|
code_revision: Optional[str] = None,
|
||||||
rope_scaling: Optional[dict] = None,
|
rope_scaling: Optional[dict] = None,
|
||||||
|
rope_theta: Optional[float] = None,
|
||||||
tokenizer_revision: Optional[str] = None,
|
tokenizer_revision: Optional[str] = None,
|
||||||
max_model_len: Optional[int] = None,
|
max_model_len: Optional[int] = None,
|
||||||
quantization: Optional[str] = None,
|
quantization: Optional[str] = None,
|
||||||
@ -113,6 +114,7 @@ class ModelConfig:
|
|||||||
self.revision = revision
|
self.revision = revision
|
||||||
self.code_revision = code_revision
|
self.code_revision = code_revision
|
||||||
self.rope_scaling = rope_scaling
|
self.rope_scaling = rope_scaling
|
||||||
|
self.rope_theta = rope_theta
|
||||||
# The tokenizer version is consistent with the model version by default.
|
# The tokenizer version is consistent with the model version by default.
|
||||||
if tokenizer_revision is None:
|
if tokenizer_revision is None:
|
||||||
self.tokenizer_revision = revision
|
self.tokenizer_revision = revision
|
||||||
@ -132,7 +134,7 @@ class ModelConfig:
|
|||||||
self.skip_tokenizer_init = skip_tokenizer_init
|
self.skip_tokenizer_init = skip_tokenizer_init
|
||||||
|
|
||||||
self.hf_config = get_config(self.model, trust_remote_code, revision,
|
self.hf_config = get_config(self.model, trust_remote_code, revision,
|
||||||
code_revision, rope_scaling)
|
code_revision, rope_scaling, rope_theta)
|
||||||
self.hf_text_config = get_hf_text_config(self.hf_config)
|
self.hf_text_config = get_hf_text_config(self.hf_config)
|
||||||
self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
|
self.dtype = _get_and_verify_dtype(self.hf_text_config, dtype)
|
||||||
self.max_model_len = _get_and_verify_max_len(
|
self.max_model_len = _get_and_verify_max_len(
|
||||||
|
|||||||
@ -53,6 +53,7 @@ class EngineArgs:
|
|||||||
revision: Optional[str] = None
|
revision: Optional[str] = None
|
||||||
code_revision: Optional[str] = None
|
code_revision: Optional[str] = None
|
||||||
rope_scaling: Optional[dict] = None
|
rope_scaling: Optional[dict] = None
|
||||||
|
rope_theta: Optional[float] = None
|
||||||
tokenizer_revision: Optional[str] = None
|
tokenizer_revision: Optional[str] = None
|
||||||
quantization: Optional[str] = None
|
quantization: Optional[str] = None
|
||||||
enforce_eager: bool = False
|
enforce_eager: bool = False
|
||||||
@ -400,6 +401,12 @@ class EngineArgs:
|
|||||||
type=json.loads,
|
type=json.loads,
|
||||||
help='RoPE scaling configuration in JSON format. '
|
help='RoPE scaling configuration in JSON format. '
|
||||||
'For example, {"type":"dynamic","factor":2.0}')
|
'For example, {"type":"dynamic","factor":2.0}')
|
||||||
|
parser.add_argument('--rope-theta',
|
||||||
|
default=None,
|
||||||
|
type=float,
|
||||||
|
help='RoPE theta. Use with `rope_scaling`. In '
|
||||||
|
'some cases, changing the RoPE theta improves the '
|
||||||
|
'performance of the scaled model.')
|
||||||
parser.add_argument('--enforce-eager',
|
parser.add_argument('--enforce-eager',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Always use eager-mode PyTorch. If False, '
|
help='Always use eager-mode PyTorch. If False, '
|
||||||
@ -630,6 +637,7 @@ class EngineArgs:
|
|||||||
revision=self.revision,
|
revision=self.revision,
|
||||||
code_revision=self.code_revision,
|
code_revision=self.code_revision,
|
||||||
rope_scaling=self.rope_scaling,
|
rope_scaling=self.rope_scaling,
|
||||||
|
rope_theta=self.rope_theta,
|
||||||
tokenizer_revision=self.tokenizer_revision,
|
tokenizer_revision=self.tokenizer_revision,
|
||||||
max_model_len=self.max_model_len,
|
max_model_len=self.max_model_len,
|
||||||
quantization=self.quantization,
|
quantization=self.quantization,
|
||||||
|
|||||||
@ -162,7 +162,7 @@ class LLMEngine:
|
|||||||
"Initializing an LLM engine (v%s) with config: "
|
"Initializing an LLM engine (v%s) with config: "
|
||||||
"model=%r, speculative_config=%r, tokenizer=%r, "
|
"model=%r, speculative_config=%r, tokenizer=%r, "
|
||||||
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
|
"skip_tokenizer_init=%s, tokenizer_mode=%s, revision=%s, "
|
||||||
"rope_scaling=%r, tokenizer_revision=%s, "
|
"rope_scaling=%r, rope_theta=%r, tokenizer_revision=%s, "
|
||||||
"trust_remote_code=%s, dtype=%s, max_seq_len=%d, "
|
"trust_remote_code=%s, dtype=%s, max_seq_len=%d, "
|
||||||
"download_dir=%r, load_format=%s, tensor_parallel_size=%d, "
|
"download_dir=%r, load_format=%s, tensor_parallel_size=%d, "
|
||||||
"disable_custom_all_reduce=%s, quantization=%s, "
|
"disable_custom_all_reduce=%s, quantization=%s, "
|
||||||
@ -177,6 +177,7 @@ class LLMEngine:
|
|||||||
model_config.tokenizer_mode,
|
model_config.tokenizer_mode,
|
||||||
model_config.revision,
|
model_config.revision,
|
||||||
model_config.rope_scaling,
|
model_config.rope_scaling,
|
||||||
|
model_config.rope_theta,
|
||||||
model_config.tokenizer_revision,
|
model_config.tokenizer_revision,
|
||||||
model_config.trust_remote_code,
|
model_config.trust_remote_code,
|
||||||
model_config.dtype,
|
model_config.dtype,
|
||||||
|
|||||||
@ -23,7 +23,8 @@ def get_config(model: str,
|
|||||||
trust_remote_code: bool,
|
trust_remote_code: bool,
|
||||||
revision: Optional[str] = None,
|
revision: Optional[str] = None,
|
||||||
code_revision: Optional[str] = None,
|
code_revision: Optional[str] = None,
|
||||||
rope_scaling: Optional[dict] = None) -> PretrainedConfig:
|
rope_scaling: Optional[dict] = None,
|
||||||
|
rope_theta: Optional[float] = None) -> PretrainedConfig:
|
||||||
try:
|
try:
|
||||||
if VLLM_USE_MODELSCOPE:
|
if VLLM_USE_MODELSCOPE:
|
||||||
from modelscope import AutoConfig
|
from modelscope import AutoConfig
|
||||||
@ -50,10 +51,12 @@ def get_config(model: str,
|
|||||||
config = config_class.from_pretrained(model,
|
config = config_class.from_pretrained(model,
|
||||||
revision=revision,
|
revision=revision,
|
||||||
code_revision=code_revision)
|
code_revision=code_revision)
|
||||||
if rope_scaling is not None:
|
for key, value in [("rope_scaling", rope_scaling),
|
||||||
logger.info("Updating rope_scaling from %r to %r",
|
("rope_theta", rope_theta)]:
|
||||||
getattr(config, "rope_scaling", None), rope_scaling)
|
if value is not None:
|
||||||
config.update({"rope_scaling": rope_scaling})
|
logger.info("Updating %s from %r to %r", key,
|
||||||
|
getattr(config, key, None), value)
|
||||||
|
config.update({key: value})
|
||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user