From 8a0ffd6285f6a0d8137d9363f448cef78ce97712 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Sat, 9 Aug 2025 08:05:32 +0200 Subject: [PATCH] Remove mamba_ssm from vLLM requirements; install inside test container using `--no-build-isolation` (#22541) Signed-off-by: Thomas Parnell --- .buildkite/test-pipeline.yaml | 8 ++++---- docs/contributing/ci/update_pytorch_version.md | 13 ------------- requirements/test.in | 5 ++--- requirements/test.txt | 13 +------------ tests/models/language/generation/test_hybrid.py | 16 +++++++++------- tests/models/registry.py | 16 ++++++++++------ 6 files changed, 26 insertions(+), 45 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index e139c6b30586..221888edb374 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -535,8 +535,6 @@ steps: - vllm/ - tests/models/language commands: - # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. - - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' - pip freeze | grep -E 'torch' - pytest -v -s models/language -m core_model @@ -547,8 +545,10 @@ steps: - vllm/ - tests/models/language/generation commands: - # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. - - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' + # Install fast path packages for testing against transformers + # Note: also needed to run plamo2 model in vLLM + - uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5' + - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2' - pytest -v -s models/language/generation -m hybrid_model - label: Language Models Test (Extended Generation) # 1hr20min diff --git a/docs/contributing/ci/update_pytorch_version.md b/docs/contributing/ci/update_pytorch_version.md index 3a6026d450a6..7ef22d6f8c3f 100644 --- a/docs/contributing/ci/update_pytorch_version.md +++ b/docs/contributing/ci/update_pytorch_version.md @@ -131,19 +131,6 @@ MAX_JOBS=16 uv pip install --system \ --no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30" ``` -### Mamba - -```bash -uv pip install --system \ - --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.5" -``` - -### causal-conv1d - -```bash -uv pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' -``` - ## Update all the different vLLM platforms Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable diff --git a/requirements/test.in b/requirements/test.in index 1e0cab80a24f..ca22fd1551f0 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -10,7 +10,7 @@ pytest-timeout # testing utils backoff # required for phi4mm test blobfile # required for kimi-vl test -einops # required for MPT, qwen-vl and Mamba +einops # required for MPT, qwen-vl httpx librosa # required for audio tests vector_quantize_pytorch # required for minicpmo_26 test @@ -26,7 +26,6 @@ torch==2.7.1 torchaudio==2.7.1 torchvision==0.22.1 transformers_stream_generator # required for qwen-vl test -mamba_ssm==2.2.5 # required for plamo2 test matplotlib # required for qwen-vl test mistral_common[image,audio] >= 1.8.2 # required for voxtral test num2words # required for smolvlm test @@ -53,4 +52,4 @@ runai-model-streamer==0.11.0 runai-model-streamer-s3==0.11.0 fastsafetensors>=0.1.10 pydantic>=2.10 # 2.9 leads to error on python 3.10 -terratorch==1.1rc2 # required for PrithviMAE test \ No newline at end of file +terratorch==1.1rc2 # required for PrithviMAE test diff --git a/requirements/test.txt b/requirements/test.txt index 324f8153b2ac..377eeb58c482 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -178,7 +178,6 @@ einops==0.8.1 # via # -r requirements/test.in # encodec - # mamba-ssm # terratorch # torchgeo # vector-quantize-pytorch @@ -417,8 +416,6 @@ lxml==5.3.0 # sacrebleu mako==1.3.10 # via alembic -mamba-ssm==2.2.5 - # via -r requirements/test.in markdown==3.8.2 # via mlflow markdown-it-py==3.0.0 @@ -475,8 +472,6 @@ networkx==3.2.1 # via # scikit-image # torch -ninja==1.11.1.3 - # via mamba-ssm nltk==3.9.1 # via rouge-score num2words==0.5.14 @@ -629,7 +624,6 @@ packaging==24.2 # lazy-loader # lightning # lightning-utilities - # mamba-ssm # matplotlib # mlflow-skinny # peft @@ -973,7 +967,6 @@ sentencepiece==0.2.0 setuptools==77.0.3 # via # lightning-utilities - # mamba-ssm # pytablewriter # torch # triton @@ -1085,7 +1078,6 @@ torch==2.7.1+cu128 # lightly # lightning # lm-eval - # mamba-ssm # mteb # open-clip-torch # peft @@ -1152,16 +1144,13 @@ transformers==4.55.0 # -r requirements/test.in # genai-perf # lm-eval - # mamba-ssm # peft # sentence-transformers # transformers-stream-generator transformers-stream-generator==0.0.5 # via -r requirements/test.in triton==3.3.1 - # via - # mamba-ssm - # torch + # via torch tritonclient==2.51.0 # via # -r requirements/test.in diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py index 67ba2f25593d..8c3e1f5c2bb2 100644 --- a/tests/models/language/generation/test_hybrid.py +++ b/tests/models/language/generation/test_hybrid.py @@ -25,10 +25,8 @@ SSM_MODELS = [ HYBRID_MODELS = [ "ai21labs/Jamba-tiny-dev", - # NOTE: Running Plamo2 in transformers implementation requires to install - # causal-conv1d package, which is not listed as a test dependency as it's - # not compatible with pip-compile. - "pfnet/plamo-2-1b", + # skipping until vLLM implementation issues are resolved + # "pfnet/plamo-2-1b", "Zyphra/Zamba2-1.2B-instruct", "hmellor/tiny-random-BambaForCausalLM", "ibm-ai-platform/Bamba-9B-v1", @@ -83,12 +81,16 @@ def test_models( try: model_info = HF_EXAMPLE_MODELS.find_hf_info(model) model_info.check_available_online(on_fail="skip") - model_info.check_transformers_version(on_fail="skip") + hf_version_check = model_info.check_transformers_version( + on_fail="return") except ValueError: - pass + hf_version_check = None + + if hf_version_check is not None: + print(f"Skipping transformers comparison because: {hf_version_check}") with hf_runner(model) as hf_model: - if model not in HF_UNSUPPORTED_MODELS: + if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None: hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) else: diff --git a/tests/models/registry.py b/tests/models/registry.py index b1952ce9c29d..2bb06b7d190b 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -79,17 +79,17 @@ class _HfExamplesInfo: def check_transformers_version( self, *, - on_fail: Literal["error", "skip"], + on_fail: Literal["error", "skip", "return"], check_min_version: bool = True, check_max_version: bool = True, - ) -> None: + ) -> Optional[str]: """ If the installed transformers version does not meet the requirements, perform the given action. """ if (self.min_transformers_version is None and self.max_transformers_version is None): - return + return None current_version = TRANSFORMERS_VERSION cur_base_version = Version(current_version).base_version @@ -105,16 +105,18 @@ class _HfExamplesInfo: and Version(cur_base_version) > Version(max_version)): msg += f"<={max_version}` is required to run this model." else: - return + return None if self.transformers_version_reason: msg += f" Reason: {self.transformers_version_reason}" if on_fail == "error": raise RuntimeError(msg) - else: + elif on_fail == "skip": pytest.skip(msg) + return msg + def check_available_online( self, *, @@ -148,7 +150,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { trust_remote_code=True), "BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5", trust_remote_code=True), - "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B", + "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1", + min_transformers_version="4.55.1", extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501 "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m", {"1b": "bigscience/bloomz-1b1"}), @@ -223,6 +226,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { trust_remote_code=True), "JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"), "JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini", + min_transformers_version="4.55.1", extras={ "tiny": "ai21labs/Jamba-tiny-dev", "random": "ai21labs/Jamba-tiny-random", # noqa: E501