mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 14:05:01 +08:00
Mark invariant normalizer in Gemma as non-persistent (#19788)
Signed-off-by: Yu-Hang Tang <Tang.Maxin@gmail.com>
This commit is contained in:
parent
e2148dc5ea
commit
83ca9ae47b
20
tests/models/language/generation/test_gemma.py
Normal file
20
tests/models/language/generation/test_gemma.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
MODELS = ["google/gemma-2b", "google/gemma-2-2b", "google/gemma-3-4b-it"]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", MODELS)
|
||||||
|
def test_dummy_loader(vllm_runner, model: str) -> None:
|
||||||
|
with vllm_runner(
|
||||||
|
model,
|
||||||
|
load_format="dummy",
|
||||||
|
) as llm:
|
||||||
|
normalizers = llm.collective_rpc(lambda self: self.worker.model_runner.
|
||||||
|
model.model.normalizer.cpu().item())
|
||||||
|
assert np.allclose(
|
||||||
|
normalizers,
|
||||||
|
llm.llm_engine.model_config.hf_config.hidden_size**0.5,
|
||||||
|
rtol=1e-3)
|
||||||
@ -281,7 +281,9 @@ class GemmaModel(nn.Module):
|
|||||||
# data type such as bfloat16, not float32.
|
# data type such as bfloat16, not float32.
|
||||||
# See https://github.com/huggingface/transformers/pull/29402
|
# See https://github.com/huggingface/transformers/pull/29402
|
||||||
normalizer = self.config.hidden_size**0.5
|
normalizer = self.config.hidden_size**0.5
|
||||||
self.register_buffer("normalizer", torch.tensor(normalizer))
|
self.register_buffer("normalizer",
|
||||||
|
torch.tensor(normalizer),
|
||||||
|
persistent=False)
|
||||||
self.make_empty_intermediate_tensors = (
|
self.make_empty_intermediate_tensors = (
|
||||||
make_empty_intermediate_tensors_factory(
|
make_empty_intermediate_tensors_factory(
|
||||||
["hidden_states", "residual"], config.hidden_size))
|
["hidden_states", "residual"], config.hidden_size))
|
||||||
|
|||||||
@ -267,7 +267,9 @@ class Gemma2Model(nn.Module):
|
|||||||
# data type such as bfloat16, not float32.
|
# data type such as bfloat16, not float32.
|
||||||
# See https://github.com/huggingface/transformers/pull/29402
|
# See https://github.com/huggingface/transformers/pull/29402
|
||||||
normalizer = self.config.hidden_size**0.5
|
normalizer = self.config.hidden_size**0.5
|
||||||
self.register_buffer("normalizer", torch.tensor(normalizer))
|
self.register_buffer("normalizer",
|
||||||
|
torch.tensor(normalizer),
|
||||||
|
persistent=False)
|
||||||
self.make_empty_intermediate_tensors = (
|
self.make_empty_intermediate_tensors = (
|
||||||
make_empty_intermediate_tensors_factory(
|
make_empty_intermediate_tensors_factory(
|
||||||
["hidden_states", "residual"], config.hidden_size))
|
["hidden_states", "residual"], config.hidden_size))
|
||||||
|
|||||||
@ -371,7 +371,9 @@ class Gemma3Model(nn.Module):
|
|||||||
# data type such as bfloat16, not float32.
|
# data type such as bfloat16, not float32.
|
||||||
# See https://github.com/huggingface/transformers/pull/29402
|
# See https://github.com/huggingface/transformers/pull/29402
|
||||||
normalizer = self.config.hidden_size**0.5
|
normalizer = self.config.hidden_size**0.5
|
||||||
self.register_buffer("normalizer", torch.tensor(normalizer))
|
self.register_buffer("normalizer",
|
||||||
|
torch.tensor(normalizer),
|
||||||
|
persistent=False)
|
||||||
self.make_empty_intermediate_tensors = (
|
self.make_empty_intermediate_tensors = (
|
||||||
make_empty_intermediate_tensors_factory(
|
make_empty_intermediate_tensors_factory(
|
||||||
["hidden_states", "residual"], config.hidden_size))
|
["hidden_states", "residual"], config.hidden_size))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user