diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py index 6fc8f1301fdb..062258930fe1 100644 --- a/tests/models/language/generation/test_common.py +++ b/tests/models/language/generation/test_common.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os from typing import Optional import pytest @@ -99,9 +98,10 @@ AITER_MODEL_LIST = [ @pytest.mark.parametrize("num_logprobs", [5]) @pytest.mark.parametrize( "use_rocm_aiter", [True, False] if current_platform.is_rocm() else [False]) +@pytest.mark.parametrize("use_prompt_embeds", [True, False]) def test_models(hf_runner, vllm_runner, example_prompts, model: str, max_tokens: int, num_logprobs: int, use_rocm_aiter: bool, - monkeypatch) -> None: + use_prompt_embeds: bool, monkeypatch) -> None: model_info = HF_EXAMPLE_MODELS.find_hf_info(model) model_info.check_available_online(on_fail="skip") @@ -119,8 +119,6 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, # in parts of the operators pytest.skip(f"Skipping '{model}' model test with AITER kernel.") - use_prompt_embeds = os.getenv("VLLM_USE_V1") == "0" - with hf_runner(model) as hf_model: hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py index 13ecda0122be..f8ed92314c3d 100644 --- a/vllm/model_executor/models/bloom.py +++ b/vllm/model_executor/models/bloom.py @@ -257,7 +257,7 @@ class BloomModel(nn.Module): config.hidden_size)) def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: - return self.word_embeddings_layernorm(self.word_embeddings(input_ids)) + return self.word_embeddings(input_ids) def forward( self, @@ -271,6 +271,7 @@ class BloomModel(nn.Module): hidden_states = inputs_embeds else: hidden_states = self.get_input_embeddings(input_ids) + hidden_states = self.word_embeddings_layernorm(hidden_states) else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"]