From 8a0ffd6285f6a0d8137d9363f448cef78ce97712 Mon Sep 17 00:00:00 2001
From: Thomas Parnell <tpa@zurich.ibm.com>
Date: Sat, 9 Aug 2025 08:05:32 +0200
Subject: [PATCH] Remove mamba_ssm from vLLM requirements; install inside test
 container using `--no-build-isolation` (#22541)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
---
 .buildkite/test-pipeline.yaml                   |  8 ++++----
 docs/contributing/ci/update_pytorch_version.md  | 13 -------------
 requirements/test.in                            |  5 ++---
 requirements/test.txt                           | 13 +------------
 tests/models/language/generation/test_hybrid.py | 16 +++++++++-------
 tests/models/registry.py                        | 16 ++++++++++------
 6 files changed, 26 insertions(+), 45 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index e139c6b30586..221888edb374 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -535,8 +535,6 @@ steps:
   - vllm/
   - tests/models/language
   commands:
-    # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
-    - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
     - pip freeze | grep -E 'torch'
     - pytest -v -s models/language -m core_model
 
@@ -547,8 +545,10 @@ steps:
   - vllm/
   - tests/models/language/generation
   commands:
-    # Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
-    - pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
+    # Install fast path packages for testing against transformers
+    # Note: also needed to run plamo2 model in vLLM
+    - uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
+    - uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
     - pytest -v -s models/language/generation -m hybrid_model
 
 - label: Language Models Test (Extended Generation) # 1hr20min
diff --git a/docs/contributing/ci/update_pytorch_version.md b/docs/contributing/ci/update_pytorch_version.md
index 3a6026d450a6..7ef22d6f8c3f 100644
--- a/docs/contributing/ci/update_pytorch_version.md
+++ b/docs/contributing/ci/update_pytorch_version.md
@@ -131,19 +131,6 @@ MAX_JOBS=16 uv pip install --system \
     --no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
 ```
 
-### Mamba
-
-```bash
-uv pip install --system \
-    --no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.5"
-```
-
-### causal-conv1d
-
-```bash
-uv pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
-```
-
 ## Update all the different vLLM platforms
 
 Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable
diff --git a/requirements/test.in b/requirements/test.in
index 1e0cab80a24f..ca22fd1551f0 100644
--- a/requirements/test.in
+++ b/requirements/test.in
@@ -10,7 +10,7 @@ pytest-timeout
 # testing utils
 backoff # required for phi4mm test
 blobfile # required for kimi-vl test
-einops # required for MPT, qwen-vl and Mamba
+einops # required for MPT, qwen-vl
 httpx
 librosa # required for audio tests
 vector_quantize_pytorch # required for minicpmo_26 test
@@ -26,7 +26,6 @@ torch==2.7.1
 torchaudio==2.7.1
 torchvision==0.22.1
 transformers_stream_generator # required for qwen-vl test
-mamba_ssm==2.2.5 # required for plamo2 test
 matplotlib # required for qwen-vl test
 mistral_common[image,audio] >= 1.8.2 # required for voxtral test
 num2words # required for smolvlm test
@@ -53,4 +52,4 @@ runai-model-streamer==0.11.0
 runai-model-streamer-s3==0.11.0
 fastsafetensors>=0.1.10
 pydantic>=2.10 # 2.9 leads to error on python 3.10
-terratorch==1.1rc2 # required for PrithviMAE test
\ No newline at end of file
+terratorch==1.1rc2 # required for PrithviMAE test
diff --git a/requirements/test.txt b/requirements/test.txt
index 324f8153b2ac..377eeb58c482 100644
--- a/requirements/test.txt
+++ b/requirements/test.txt
@@ -178,7 +178,6 @@ einops==0.8.1
     # via
     #   -r requirements/test.in
     #   encodec
-    #   mamba-ssm
     #   terratorch
     #   torchgeo
     #   vector-quantize-pytorch
@@ -417,8 +416,6 @@ lxml==5.3.0
     #   sacrebleu
 mako==1.3.10
     # via alembic
-mamba-ssm==2.2.5
-    # via -r requirements/test.in
 markdown==3.8.2
     # via mlflow
 markdown-it-py==3.0.0
@@ -475,8 +472,6 @@ networkx==3.2.1
     # via
     #   scikit-image
     #   torch
-ninja==1.11.1.3
-    # via mamba-ssm
 nltk==3.9.1
     # via rouge-score
 num2words==0.5.14
@@ -629,7 +624,6 @@ packaging==24.2
     #   lazy-loader
     #   lightning
     #   lightning-utilities
-    #   mamba-ssm
     #   matplotlib
     #   mlflow-skinny
     #   peft
@@ -973,7 +967,6 @@ sentencepiece==0.2.0
 setuptools==77.0.3
     # via
     #   lightning-utilities
-    #   mamba-ssm
     #   pytablewriter
     #   torch
     #   triton
@@ -1085,7 +1078,6 @@ torch==2.7.1+cu128
     #   lightly
     #   lightning
     #   lm-eval
-    #   mamba-ssm
     #   mteb
     #   open-clip-torch
     #   peft
@@ -1152,16 +1144,13 @@ transformers==4.55.0
     #   -r requirements/test.in
     #   genai-perf
     #   lm-eval
-    #   mamba-ssm
     #   peft
     #   sentence-transformers
     #   transformers-stream-generator
 transformers-stream-generator==0.0.5
     # via -r requirements/test.in
 triton==3.3.1
-    # via
-    #   mamba-ssm
-    #   torch
+    # via torch
 tritonclient==2.51.0
     # via
     #   -r requirements/test.in
diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py
index 67ba2f25593d..8c3e1f5c2bb2 100644
--- a/tests/models/language/generation/test_hybrid.py
+++ b/tests/models/language/generation/test_hybrid.py
@@ -25,10 +25,8 @@ SSM_MODELS = [
 
 HYBRID_MODELS = [
     "ai21labs/Jamba-tiny-dev",
-    # NOTE: Running Plamo2 in transformers implementation requires to install
-    # causal-conv1d package, which is not listed as a test dependency as it's
-    # not compatible with pip-compile.
-    "pfnet/plamo-2-1b",
+    # skipping until vLLM implementation issues are resolved
+    # "pfnet/plamo-2-1b",
     "Zyphra/Zamba2-1.2B-instruct",
     "hmellor/tiny-random-BambaForCausalLM",
     "ibm-ai-platform/Bamba-9B-v1",
@@ -83,12 +81,16 @@ def test_models(
     try:
         model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
         model_info.check_available_online(on_fail="skip")
-        model_info.check_transformers_version(on_fail="skip")
+        hf_version_check = model_info.check_transformers_version(
+            on_fail="return")
     except ValueError:
-        pass
+        hf_version_check = None
+
+    if hf_version_check is not None:
+        print(f"Skipping transformers comparison because: {hf_version_check}")
 
     with hf_runner(model) as hf_model:
-        if model not in HF_UNSUPPORTED_MODELS:
+        if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None:
             hf_outputs = hf_model.generate_greedy_logprobs_limit(
                 example_prompts, max_tokens, num_logprobs)
         else:
diff --git a/tests/models/registry.py b/tests/models/registry.py
index b1952ce9c29d..2bb06b7d190b 100644
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -79,17 +79,17 @@ class _HfExamplesInfo:
     def check_transformers_version(
         self,
         *,
-        on_fail: Literal["error", "skip"],
+        on_fail: Literal["error", "skip", "return"],
         check_min_version: bool = True,
         check_max_version: bool = True,
-    ) -> None:
+    ) -> Optional[str]:
         """
         If the installed transformers version does not meet the requirements,
         perform the given action.
         """
         if (self.min_transformers_version is None
                 and self.max_transformers_version is None):
-            return
+            return None
 
         current_version = TRANSFORMERS_VERSION
         cur_base_version = Version(current_version).base_version
@@ -105,16 +105,18 @@ class _HfExamplesInfo:
               and Version(cur_base_version) > Version(max_version)):
             msg += f"<={max_version}` is required to run this model."
         else:
-            return
+            return None
 
         if self.transformers_version_reason:
             msg += f" Reason: {self.transformers_version_reason}"
 
         if on_fail == "error":
             raise RuntimeError(msg)
-        else:
+        elif on_fail == "skip":
             pytest.skip(msg)
 
+        return msg
+
     def check_available_online(
         self,
         *,
@@ -148,7 +150,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                          trust_remote_code=True),
     "BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
                                          trust_remote_code=True),
-    "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B",
+    "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1",
+                                        min_transformers_version="4.55.1",
                                         extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}),  # noqa: E501
     "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
                                         {"1b": "bigscience/bloomz-1b1"}),
@@ -223,6 +226,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
                                             trust_remote_code=True),
     "JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
     "JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
+                                        min_transformers_version="4.55.1",
                                         extras={
                                             "tiny": "ai21labs/Jamba-tiny-dev",
                                             "random": "ai21labs/Jamba-tiny-random",  # noqa: E501