Remove mamba_ssm from vLLM requirements; install inside test container using --no-build-isolation (#22541)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell 2025-08-09 08:05:32 +02:00 committed by GitHub
parent 23472ff51c
commit 8a0ffd6285
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 26 additions and 45 deletions

View File

@ -535,8 +535,6 @@ steps:
- vllm/
- tests/models/language
commands:
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
- pip freeze | grep -E 'torch'
- pytest -v -s models/language -m core_model
@ -547,8 +545,10 @@ steps:
- vllm/
- tests/models/language/generation
commands:
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
# Install fast path packages for testing against transformers
# Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
- pytest -v -s models/language/generation -m hybrid_model
- label: Language Models Test (Extended Generation) # 1hr20min

View File

@ -131,19 +131,6 @@ MAX_JOBS=16 uv pip install --system \
--no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
```
### Mamba
```bash
uv pip install --system \
--no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.5"
```
### causal-conv1d
```bash
uv pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
```
## Update all the different vLLM platforms
Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable

View File

@ -10,7 +10,7 @@ pytest-timeout
# testing utils
backoff # required for phi4mm test
blobfile # required for kimi-vl test
einops # required for MPT, qwen-vl and Mamba
einops # required for MPT, qwen-vl
httpx
librosa # required for audio tests
vector_quantize_pytorch # required for minicpmo_26 test
@ -26,7 +26,6 @@ torch==2.7.1
torchaudio==2.7.1
torchvision==0.22.1
transformers_stream_generator # required for qwen-vl test
mamba_ssm==2.2.5 # required for plamo2 test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test
@ -53,4 +52,4 @@ runai-model-streamer==0.11.0
runai-model-streamer-s3==0.11.0
fastsafetensors>=0.1.10
pydantic>=2.10 # 2.9 leads to error on python 3.10
terratorch==1.1rc2 # required for PrithviMAE test
terratorch==1.1rc2 # required for PrithviMAE test

View File

@ -178,7 +178,6 @@ einops==0.8.1
# via
# -r requirements/test.in
# encodec
# mamba-ssm
# terratorch
# torchgeo
# vector-quantize-pytorch
@ -417,8 +416,6 @@ lxml==5.3.0
# sacrebleu
mako==1.3.10
# via alembic
mamba-ssm==2.2.5
# via -r requirements/test.in
markdown==3.8.2
# via mlflow
markdown-it-py==3.0.0
@ -475,8 +472,6 @@ networkx==3.2.1
# via
# scikit-image
# torch
ninja==1.11.1.3
# via mamba-ssm
nltk==3.9.1
# via rouge-score
num2words==0.5.14
@ -629,7 +624,6 @@ packaging==24.2
# lazy-loader
# lightning
# lightning-utilities
# mamba-ssm
# matplotlib
# mlflow-skinny
# peft
@ -973,7 +967,6 @@ sentencepiece==0.2.0
setuptools==77.0.3
# via
# lightning-utilities
# mamba-ssm
# pytablewriter
# torch
# triton
@ -1085,7 +1078,6 @@ torch==2.7.1+cu128
# lightly
# lightning
# lm-eval
# mamba-ssm
# mteb
# open-clip-torch
# peft
@ -1152,16 +1144,13 @@ transformers==4.55.0
# -r requirements/test.in
# genai-perf
# lm-eval
# mamba-ssm
# peft
# sentence-transformers
# transformers-stream-generator
transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.3.1
# via
# mamba-ssm
# torch
# via torch
tritonclient==2.51.0
# via
# -r requirements/test.in

View File

@ -25,10 +25,8 @@ SSM_MODELS = [
HYBRID_MODELS = [
"ai21labs/Jamba-tiny-dev",
# NOTE: Running Plamo2 in transformers implementation requires to install
# causal-conv1d package, which is not listed as a test dependency as it's
# not compatible with pip-compile.
"pfnet/plamo-2-1b",
# skipping until vLLM implementation issues are resolved
# "pfnet/plamo-2-1b",
"Zyphra/Zamba2-1.2B-instruct",
"hmellor/tiny-random-BambaForCausalLM",
"ibm-ai-platform/Bamba-9B-v1",
@ -83,12 +81,16 @@ def test_models(
try:
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_available_online(on_fail="skip")
model_info.check_transformers_version(on_fail="skip")
hf_version_check = model_info.check_transformers_version(
on_fail="return")
except ValueError:
pass
hf_version_check = None
if hf_version_check is not None:
print(f"Skipping transformers comparison because: {hf_version_check}")
with hf_runner(model) as hf_model:
if model not in HF_UNSUPPORTED_MODELS:
if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None:
hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs)
else:

View File

@ -79,17 +79,17 @@ class _HfExamplesInfo:
def check_transformers_version(
self,
*,
on_fail: Literal["error", "skip"],
on_fail: Literal["error", "skip", "return"],
check_min_version: bool = True,
check_max_version: bool = True,
) -> None:
) -> Optional[str]:
"""
If the installed transformers version does not meet the requirements,
perform the given action.
"""
if (self.min_transformers_version is None
and self.max_transformers_version is None):
return
return None
current_version = TRANSFORMERS_VERSION
cur_base_version = Version(current_version).base_version
@ -105,16 +105,18 @@ class _HfExamplesInfo:
and Version(cur_base_version) > Version(max_version)):
msg += f"<={max_version}` is required to run this model."
else:
return
return None
if self.transformers_version_reason:
msg += f" Reason: {self.transformers_version_reason}"
if on_fail == "error":
raise RuntimeError(msg)
else:
elif on_fail == "skip":
pytest.skip(msg)
return msg
def check_available_online(
self,
*,
@ -148,7 +150,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True),
"BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
trust_remote_code=True),
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B",
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1",
min_transformers_version="4.55.1",
extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
{"1b": "bigscience/bloomz-1b1"}),
@ -223,6 +226,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True),
"JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
"JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
min_transformers_version="4.55.1",
extras={
"tiny": "ai21labs/Jamba-tiny-dev",
"random": "ai21labs/Jamba-tiny-random", # noqa: E501