mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 17:15:42 +08:00
[Bugfix][FailingTest]Fix test_model_load_with_params.py (#18758)
Signed-off-by: rabi <ramishra@redhat.com>
This commit is contained in:
parent
5e13c07d00
commit
b78f844a67
@ -274,17 +274,6 @@ steps:
|
|||||||
- pytest -v -s samplers
|
- pytest -v -s samplers
|
||||||
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
|
- VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers
|
||||||
|
|
||||||
- label: LogitsProcessor Test # 5min
|
|
||||||
mirror_hardwares: [amdexperimental, amdproduction]
|
|
||||||
source_file_dependencies:
|
|
||||||
- vllm/model_executor/layers
|
|
||||||
- vllm/model_executor/guided_decoding
|
|
||||||
- tests/test_logits_processor
|
|
||||||
- tests/model_executor/test_guided_processors
|
|
||||||
commands:
|
|
||||||
- pytest -v -s test_logits_processor.py
|
|
||||||
- pytest -v -s model_executor/test_guided_processors.py
|
|
||||||
|
|
||||||
- label: Speculative decoding tests # 40min
|
- label: Speculative decoding tests # 40min
|
||||||
mirror_hardwares: [amdexperimental]
|
mirror_hardwares: [amdexperimental]
|
||||||
source_file_dependencies:
|
source_file_dependencies:
|
||||||
@ -397,6 +386,17 @@ steps:
|
|||||||
- pytest -v -s tensorizer_loader
|
- pytest -v -s tensorizer_loader
|
||||||
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
||||||
|
|
||||||
|
- label: Model Executor Test
|
||||||
|
mirror_hardwares: [amdexperimental, amdproduction]
|
||||||
|
soft_fail: true
|
||||||
|
source_file_dependencies:
|
||||||
|
- vllm/model_executor
|
||||||
|
- tests/model_executor
|
||||||
|
commands:
|
||||||
|
- apt-get update && apt-get install -y curl libsodium23
|
||||||
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
|
- pytest -v -s model_executor
|
||||||
|
|
||||||
- label: Benchmarks # 9min
|
- label: Benchmarks # 9min
|
||||||
mirror_hardwares: [amdexperimental, amdproduction]
|
mirror_hardwares: [amdexperimental, amdproduction]
|
||||||
working_dir: "/vllm-workspace/.buildkite"
|
working_dir: "/vllm-workspace/.buildkite"
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import os
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.model_executor.layers.pooler import CLSPool, PoolingType
|
from vllm.model_executor.layers.pooler import CLSPool, MeanPool, PoolingType
|
||||||
from vllm.model_executor.models.bert import BertEmbeddingModel
|
from vllm.model_executor.models.bert import BertEmbeddingModel
|
||||||
from vllm.model_executor.models.roberta import RobertaEmbeddingModel
|
from vllm.model_executor.models.roberta import RobertaEmbeddingModel
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
@ -14,7 +14,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME", "BAAI/bge-base-en-v1.5")
|
|||||||
REVISION = os.environ.get("REVISION", "main")
|
REVISION = os.environ.get("REVISION", "main")
|
||||||
|
|
||||||
MODEL_NAME_ROBERTA = os.environ.get("MODEL_NAME",
|
MODEL_NAME_ROBERTA = os.environ.get("MODEL_NAME",
|
||||||
"intfloat/multilingual-e5-small")
|
"intfloat/multilingual-e5-base")
|
||||||
REVISION_ROBERTA = os.environ.get("REVISION", "main")
|
REVISION_ROBERTA = os.environ.get("REVISION", "main")
|
||||||
|
|
||||||
|
|
||||||
@ -40,17 +40,15 @@ def test_model_loading_with_params(vllm_runner):
|
|||||||
|
|
||||||
# asserts on the pooling config files
|
# asserts on the pooling config files
|
||||||
assert model_config.pooler_config.pooling_type == PoolingType.CLS.name
|
assert model_config.pooler_config.pooling_type == PoolingType.CLS.name
|
||||||
assert model_config.pooler_config.pooling_norm
|
assert model_config.pooler_config.normalize
|
||||||
|
|
||||||
# asserts on the tokenizer loaded
|
# asserts on the tokenizer loaded
|
||||||
assert model_tokenizer.tokenizer_id == "BAAI/bge-base-en-v1.5"
|
assert model_tokenizer.tokenizer_id == "BAAI/bge-base-en-v1.5"
|
||||||
assert model_tokenizer.tokenizer_config["do_lower_case"]
|
|
||||||
assert model_tokenizer.tokenizer.model_max_length == 512
|
assert model_tokenizer.tokenizer.model_max_length == 512
|
||||||
|
|
||||||
def check_model(model):
|
def check_model(model):
|
||||||
assert isinstance(model, BertEmbeddingModel)
|
assert isinstance(model, BertEmbeddingModel)
|
||||||
assert model._pooler.pooling_type == PoolingType.CLS
|
assert isinstance(model._pooler, CLSPool)
|
||||||
assert model._pooler.normalize
|
|
||||||
|
|
||||||
vllm_model.apply_model(check_model)
|
vllm_model.apply_model(check_model)
|
||||||
|
|
||||||
@ -80,16 +78,15 @@ def test_roberta_model_loading_with_params(vllm_runner):
|
|||||||
|
|
||||||
# asserts on the pooling config files
|
# asserts on the pooling config files
|
||||||
assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name
|
assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name
|
||||||
assert model_config.pooler_config.pooling_norm
|
assert model_config.pooler_config.normalize
|
||||||
|
|
||||||
# asserts on the tokenizer loaded
|
# asserts on the tokenizer loaded
|
||||||
assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-small"
|
assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-base"
|
||||||
assert not model_tokenizer.tokenizer_config["do_lower_case"]
|
assert model_tokenizer.tokenizer.model_max_length == 512
|
||||||
|
|
||||||
def check_model(model):
|
def check_model(model):
|
||||||
assert isinstance(model, RobertaEmbeddingModel)
|
assert isinstance(model, RobertaEmbeddingModel)
|
||||||
assert model._pooler.pooling_type == PoolingType.MEAN
|
assert isinstance(model._pooler, MeanPool)
|
||||||
assert model._pooler.normalize
|
|
||||||
|
|
||||||
vllm_model.apply_model(check_model)
|
vllm_model.apply_model(check_model)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user