Remove mamba_ssm from vLLM requirements; install inside test container using --no-build-isolation (#22541)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell 2025-08-09 08:05:32 +02:00 committed by GitHub
parent 23472ff51c
commit 8a0ffd6285
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 26 additions and 45 deletions

View File

@ -535,8 +535,6 @@ steps:
- vllm/ - vllm/
- tests/models/language - tests/models/language
commands: commands:
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
- pip freeze | grep -E 'torch' - pip freeze | grep -E 'torch'
- pytest -v -s models/language -m core_model - pytest -v -s models/language -m core_model
@ -547,8 +545,10 @@ steps:
- vllm/ - vllm/
- tests/models/language/generation - tests/models/language/generation
commands: commands:
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile. # Install fast path packages for testing against transformers
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8' # Note: also needed to run plamo2 model in vLLM
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
- pytest -v -s models/language/generation -m hybrid_model - pytest -v -s models/language/generation -m hybrid_model
- label: Language Models Test (Extended Generation) # 1hr20min - label: Language Models Test (Extended Generation) # 1hr20min

View File

@ -131,19 +131,6 @@ MAX_JOBS=16 uv pip install --system \
--no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30" --no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
``` ```
### Mamba
```bash
uv pip install --system \
--no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.5"
```
### causal-conv1d
```bash
uv pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
```
## Update all the different vLLM platforms ## Update all the different vLLM platforms
Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable

View File

@ -10,7 +10,7 @@ pytest-timeout
# testing utils # testing utils
backoff # required for phi4mm test backoff # required for phi4mm test
blobfile # required for kimi-vl test blobfile # required for kimi-vl test
einops # required for MPT, qwen-vl and Mamba einops # required for MPT, qwen-vl
httpx httpx
librosa # required for audio tests librosa # required for audio tests
vector_quantize_pytorch # required for minicpmo_26 test vector_quantize_pytorch # required for minicpmo_26 test
@ -26,7 +26,6 @@ torch==2.7.1
torchaudio==2.7.1 torchaudio==2.7.1
torchvision==0.22.1 torchvision==0.22.1
transformers_stream_generator # required for qwen-vl test transformers_stream_generator # required for qwen-vl test
mamba_ssm==2.2.5 # required for plamo2 test
matplotlib # required for qwen-vl test matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.2 # required for voxtral test mistral_common[image,audio] >= 1.8.2 # required for voxtral test
num2words # required for smolvlm test num2words # required for smolvlm test

View File

@ -178,7 +178,6 @@ einops==0.8.1
# via # via
# -r requirements/test.in # -r requirements/test.in
# encodec # encodec
# mamba-ssm
# terratorch # terratorch
# torchgeo # torchgeo
# vector-quantize-pytorch # vector-quantize-pytorch
@ -417,8 +416,6 @@ lxml==5.3.0
# sacrebleu # sacrebleu
mako==1.3.10 mako==1.3.10
# via alembic # via alembic
mamba-ssm==2.2.5
# via -r requirements/test.in
markdown==3.8.2 markdown==3.8.2
# via mlflow # via mlflow
markdown-it-py==3.0.0 markdown-it-py==3.0.0
@ -475,8 +472,6 @@ networkx==3.2.1
# via # via
# scikit-image # scikit-image
# torch # torch
ninja==1.11.1.3
# via mamba-ssm
nltk==3.9.1 nltk==3.9.1
# via rouge-score # via rouge-score
num2words==0.5.14 num2words==0.5.14
@ -629,7 +624,6 @@ packaging==24.2
# lazy-loader # lazy-loader
# lightning # lightning
# lightning-utilities # lightning-utilities
# mamba-ssm
# matplotlib # matplotlib
# mlflow-skinny # mlflow-skinny
# peft # peft
@ -973,7 +967,6 @@ sentencepiece==0.2.0
setuptools==77.0.3 setuptools==77.0.3
# via # via
# lightning-utilities # lightning-utilities
# mamba-ssm
# pytablewriter # pytablewriter
# torch # torch
# triton # triton
@ -1085,7 +1078,6 @@ torch==2.7.1+cu128
# lightly # lightly
# lightning # lightning
# lm-eval # lm-eval
# mamba-ssm
# mteb # mteb
# open-clip-torch # open-clip-torch
# peft # peft
@ -1152,16 +1144,13 @@ transformers==4.55.0
# -r requirements/test.in # -r requirements/test.in
# genai-perf # genai-perf
# lm-eval # lm-eval
# mamba-ssm
# peft # peft
# sentence-transformers # sentence-transformers
# transformers-stream-generator # transformers-stream-generator
transformers-stream-generator==0.0.5 transformers-stream-generator==0.0.5
# via -r requirements/test.in # via -r requirements/test.in
triton==3.3.1 triton==3.3.1
# via # via torch
# mamba-ssm
# torch
tritonclient==2.51.0 tritonclient==2.51.0
# via # via
# -r requirements/test.in # -r requirements/test.in

View File

@ -25,10 +25,8 @@ SSM_MODELS = [
HYBRID_MODELS = [ HYBRID_MODELS = [
"ai21labs/Jamba-tiny-dev", "ai21labs/Jamba-tiny-dev",
# NOTE: Running Plamo2 in transformers implementation requires to install # skipping until vLLM implementation issues are resolved
# causal-conv1d package, which is not listed as a test dependency as it's # "pfnet/plamo-2-1b",
# not compatible with pip-compile.
"pfnet/plamo-2-1b",
"Zyphra/Zamba2-1.2B-instruct", "Zyphra/Zamba2-1.2B-instruct",
"hmellor/tiny-random-BambaForCausalLM", "hmellor/tiny-random-BambaForCausalLM",
"ibm-ai-platform/Bamba-9B-v1", "ibm-ai-platform/Bamba-9B-v1",
@ -83,12 +81,16 @@ def test_models(
try: try:
model_info = HF_EXAMPLE_MODELS.find_hf_info(model) model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_available_online(on_fail="skip") model_info.check_available_online(on_fail="skip")
model_info.check_transformers_version(on_fail="skip") hf_version_check = model_info.check_transformers_version(
on_fail="return")
except ValueError: except ValueError:
pass hf_version_check = None
if hf_version_check is not None:
print(f"Skipping transformers comparison because: {hf_version_check}")
with hf_runner(model) as hf_model: with hf_runner(model) as hf_model:
if model not in HF_UNSUPPORTED_MODELS: if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None:
hf_outputs = hf_model.generate_greedy_logprobs_limit( hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs) example_prompts, max_tokens, num_logprobs)
else: else:

View File

@ -79,17 +79,17 @@ class _HfExamplesInfo:
def check_transformers_version( def check_transformers_version(
self, self,
*, *,
on_fail: Literal["error", "skip"], on_fail: Literal["error", "skip", "return"],
check_min_version: bool = True, check_min_version: bool = True,
check_max_version: bool = True, check_max_version: bool = True,
) -> None: ) -> Optional[str]:
""" """
If the installed transformers version does not meet the requirements, If the installed transformers version does not meet the requirements,
perform the given action. perform the given action.
""" """
if (self.min_transformers_version is None if (self.min_transformers_version is None
and self.max_transformers_version is None): and self.max_transformers_version is None):
return return None
current_version = TRANSFORMERS_VERSION current_version = TRANSFORMERS_VERSION
cur_base_version = Version(current_version).base_version cur_base_version = Version(current_version).base_version
@ -105,16 +105,18 @@ class _HfExamplesInfo:
and Version(cur_base_version) > Version(max_version)): and Version(cur_base_version) > Version(max_version)):
msg += f"<={max_version}` is required to run this model." msg += f"<={max_version}` is required to run this model."
else: else:
return return None
if self.transformers_version_reason: if self.transformers_version_reason:
msg += f" Reason: {self.transformers_version_reason}" msg += f" Reason: {self.transformers_version_reason}"
if on_fail == "error": if on_fail == "error":
raise RuntimeError(msg) raise RuntimeError(msg)
else: elif on_fail == "skip":
pytest.skip(msg) pytest.skip(msg)
return msg
def check_available_online( def check_available_online(
self, self,
*, *,
@ -148,7 +150,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True), trust_remote_code=True),
"BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5", "BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
trust_remote_code=True), trust_remote_code=True),
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B", "BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1",
min_transformers_version="4.55.1",
extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501 extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m", "BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
{"1b": "bigscience/bloomz-1b1"}), {"1b": "bigscience/bloomz-1b1"}),
@ -223,6 +226,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
trust_remote_code=True), trust_remote_code=True),
"JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"), "JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
"JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini", "JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
min_transformers_version="4.55.1",
extras={ extras={
"tiny": "ai21labs/Jamba-tiny-dev", "tiny": "ai21labs/Jamba-tiny-dev",
"random": "ai21labs/Jamba-tiny-random", # noqa: E501 "random": "ai21labs/Jamba-tiny-random", # noqa: E501