diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 80a5a610c8ac9..4e7bea25e1717 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -274,17 +274,6 @@ steps: - pytest -v -s samplers - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers -- label: LogitsProcessor Test # 5min - mirror_hardwares: [amdexperimental, amdproduction] - source_file_dependencies: - - vllm/model_executor/layers - - vllm/model_executor/guided_decoding - - tests/test_logits_processor - - tests/model_executor/test_guided_processors - commands: - - pytest -v -s test_logits_processor.py - - pytest -v -s model_executor/test_guided_processors.py - - label: Speculative decoding tests # 40min mirror_hardwares: [amdexperimental] source_file_dependencies: @@ -397,6 +386,17 @@ steps: - pytest -v -s tensorizer_loader - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py +- label: Model Executor Test + mirror_hardwares: [amdexperimental, amdproduction] + soft_fail: true + source_file_dependencies: + - vllm/model_executor + - tests/model_executor + commands: + - apt-get update && apt-get install -y curl libsodium23 + - export VLLM_WORKER_MULTIPROC_METHOD=spawn + - pytest -v -s model_executor + - label: Benchmarks # 9min mirror_hardwares: [amdexperimental, amdproduction] working_dir: "/vllm-workspace/.buildkite" diff --git a/tests/test_logits_processor.py b/tests/model_executor/test_logits_processor.py similarity index 100% rename from tests/test_logits_processor.py rename to tests/model_executor/test_logits_processor.py diff --git a/tests/model_executor/test_model_load_with_params.py b/tests/model_executor/test_model_load_with_params.py index f8efa2eff857b..7fda1f0e80d07 100644 --- a/tests/model_executor/test_model_load_with_params.py +++ b/tests/model_executor/test_model_load_with_params.py @@ -4,7 +4,7 @@ import os import pytest -from vllm.model_executor.layers.pooler import CLSPool, PoolingType +from vllm.model_executor.layers.pooler import CLSPool, MeanPool, PoolingType from vllm.model_executor.models.bert import BertEmbeddingModel from vllm.model_executor.models.roberta import RobertaEmbeddingModel from vllm.platforms import current_platform @@ -14,7 +14,7 @@ MODEL_NAME = os.environ.get("MODEL_NAME", "BAAI/bge-base-en-v1.5") REVISION = os.environ.get("REVISION", "main") MODEL_NAME_ROBERTA = os.environ.get("MODEL_NAME", - "intfloat/multilingual-e5-small") + "intfloat/multilingual-e5-base") REVISION_ROBERTA = os.environ.get("REVISION", "main") @@ -40,17 +40,15 @@ def test_model_loading_with_params(vllm_runner): # asserts on the pooling config files assert model_config.pooler_config.pooling_type == PoolingType.CLS.name - assert model_config.pooler_config.pooling_norm + assert model_config.pooler_config.normalize # asserts on the tokenizer loaded assert model_tokenizer.tokenizer_id == "BAAI/bge-base-en-v1.5" - assert model_tokenizer.tokenizer_config["do_lower_case"] assert model_tokenizer.tokenizer.model_max_length == 512 def check_model(model): assert isinstance(model, BertEmbeddingModel) - assert model._pooler.pooling_type == PoolingType.CLS - assert model._pooler.normalize + assert isinstance(model._pooler, CLSPool) vllm_model.apply_model(check_model) @@ -80,16 +78,15 @@ def test_roberta_model_loading_with_params(vllm_runner): # asserts on the pooling config files assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name - assert model_config.pooler_config.pooling_norm + assert model_config.pooler_config.normalize # asserts on the tokenizer loaded - assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-small" - assert not model_tokenizer.tokenizer_config["do_lower_case"] + assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-base" + assert model_tokenizer.tokenizer.model_max_length == 512 def check_model(model): assert isinstance(model, RobertaEmbeddingModel) - assert model._pooler.pooling_type == PoolingType.MEAN - assert model._pooler.normalize + assert isinstance(model._pooler, MeanPool) vllm_model.apply_model(check_model) diff --git a/tests/model_executor/weight_utils.py b/tests/model_executor/test_weight_utils.py similarity index 100% rename from tests/model_executor/weight_utils.py rename to tests/model_executor/test_weight_utils.py