mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:05:01 +08:00
Remove mamba_ssm from vLLM requirements; install inside test container using --no-build-isolation (#22541)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
parent
23472ff51c
commit
8a0ffd6285
@ -535,8 +535,6 @@ steps:
|
|||||||
- vllm/
|
- vllm/
|
||||||
- tests/models/language
|
- tests/models/language
|
||||||
commands:
|
commands:
|
||||||
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
|
|
||||||
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
|
|
||||||
- pip freeze | grep -E 'torch'
|
- pip freeze | grep -E 'torch'
|
||||||
- pytest -v -s models/language -m core_model
|
- pytest -v -s models/language -m core_model
|
||||||
|
|
||||||
@ -547,8 +545,10 @@ steps:
|
|||||||
- vllm/
|
- vllm/
|
||||||
- tests/models/language/generation
|
- tests/models/language/generation
|
||||||
commands:
|
commands:
|
||||||
# Install causal-conv1d for plamo2 models here, as it is not compatible with pip-compile.
|
# Install fast path packages for testing against transformers
|
||||||
- pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
|
# Note: also needed to run plamo2 model in vLLM
|
||||||
|
- uv pip install --system --no-build-isolation 'git+https://github.com/state-spaces/mamba@v2.2.5'
|
||||||
|
- uv pip install --system --no-build-isolation 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.2'
|
||||||
- pytest -v -s models/language/generation -m hybrid_model
|
- pytest -v -s models/language/generation -m hybrid_model
|
||||||
|
|
||||||
- label: Language Models Test (Extended Generation) # 1hr20min
|
- label: Language Models Test (Extended Generation) # 1hr20min
|
||||||
|
|||||||
@ -131,19 +131,6 @@ MAX_JOBS=16 uv pip install --system \
|
|||||||
--no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
|
--no-build-isolation "git+https://github.com/facebookresearch/xformers@v0.0.30"
|
||||||
```
|
```
|
||||||
|
|
||||||
### Mamba
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv pip install --system \
|
|
||||||
--no-build-isolation "git+https://github.com/state-spaces/mamba@v2.2.5"
|
|
||||||
```
|
|
||||||
|
|
||||||
### causal-conv1d
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv pip install 'git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8'
|
|
||||||
```
|
|
||||||
|
|
||||||
## Update all the different vLLM platforms
|
## Update all the different vLLM platforms
|
||||||
|
|
||||||
Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable
|
Rather than attempting to update all vLLM platforms in a single pull request, it's more manageable
|
||||||
|
|||||||
@ -10,7 +10,7 @@ pytest-timeout
|
|||||||
# testing utils
|
# testing utils
|
||||||
backoff # required for phi4mm test
|
backoff # required for phi4mm test
|
||||||
blobfile # required for kimi-vl test
|
blobfile # required for kimi-vl test
|
||||||
einops # required for MPT, qwen-vl and Mamba
|
einops # required for MPT, qwen-vl
|
||||||
httpx
|
httpx
|
||||||
librosa # required for audio tests
|
librosa # required for audio tests
|
||||||
vector_quantize_pytorch # required for minicpmo_26 test
|
vector_quantize_pytorch # required for minicpmo_26 test
|
||||||
@ -26,7 +26,6 @@ torch==2.7.1
|
|||||||
torchaudio==2.7.1
|
torchaudio==2.7.1
|
||||||
torchvision==0.22.1
|
torchvision==0.22.1
|
||||||
transformers_stream_generator # required for qwen-vl test
|
transformers_stream_generator # required for qwen-vl test
|
||||||
mamba_ssm==2.2.5 # required for plamo2 test
|
|
||||||
matplotlib # required for qwen-vl test
|
matplotlib # required for qwen-vl test
|
||||||
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
|
mistral_common[image,audio] >= 1.8.2 # required for voxtral test
|
||||||
num2words # required for smolvlm test
|
num2words # required for smolvlm test
|
||||||
|
|||||||
@ -178,7 +178,6 @@ einops==0.8.1
|
|||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# encodec
|
# encodec
|
||||||
# mamba-ssm
|
|
||||||
# terratorch
|
# terratorch
|
||||||
# torchgeo
|
# torchgeo
|
||||||
# vector-quantize-pytorch
|
# vector-quantize-pytorch
|
||||||
@ -417,8 +416,6 @@ lxml==5.3.0
|
|||||||
# sacrebleu
|
# sacrebleu
|
||||||
mako==1.3.10
|
mako==1.3.10
|
||||||
# via alembic
|
# via alembic
|
||||||
mamba-ssm==2.2.5
|
|
||||||
# via -r requirements/test.in
|
|
||||||
markdown==3.8.2
|
markdown==3.8.2
|
||||||
# via mlflow
|
# via mlflow
|
||||||
markdown-it-py==3.0.0
|
markdown-it-py==3.0.0
|
||||||
@ -475,8 +472,6 @@ networkx==3.2.1
|
|||||||
# via
|
# via
|
||||||
# scikit-image
|
# scikit-image
|
||||||
# torch
|
# torch
|
||||||
ninja==1.11.1.3
|
|
||||||
# via mamba-ssm
|
|
||||||
nltk==3.9.1
|
nltk==3.9.1
|
||||||
# via rouge-score
|
# via rouge-score
|
||||||
num2words==0.5.14
|
num2words==0.5.14
|
||||||
@ -629,7 +624,6 @@ packaging==24.2
|
|||||||
# lazy-loader
|
# lazy-loader
|
||||||
# lightning
|
# lightning
|
||||||
# lightning-utilities
|
# lightning-utilities
|
||||||
# mamba-ssm
|
|
||||||
# matplotlib
|
# matplotlib
|
||||||
# mlflow-skinny
|
# mlflow-skinny
|
||||||
# peft
|
# peft
|
||||||
@ -973,7 +967,6 @@ sentencepiece==0.2.0
|
|||||||
setuptools==77.0.3
|
setuptools==77.0.3
|
||||||
# via
|
# via
|
||||||
# lightning-utilities
|
# lightning-utilities
|
||||||
# mamba-ssm
|
|
||||||
# pytablewriter
|
# pytablewriter
|
||||||
# torch
|
# torch
|
||||||
# triton
|
# triton
|
||||||
@ -1085,7 +1078,6 @@ torch==2.7.1+cu128
|
|||||||
# lightly
|
# lightly
|
||||||
# lightning
|
# lightning
|
||||||
# lm-eval
|
# lm-eval
|
||||||
# mamba-ssm
|
|
||||||
# mteb
|
# mteb
|
||||||
# open-clip-torch
|
# open-clip-torch
|
||||||
# peft
|
# peft
|
||||||
@ -1152,16 +1144,13 @@ transformers==4.55.0
|
|||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# genai-perf
|
# genai-perf
|
||||||
# lm-eval
|
# lm-eval
|
||||||
# mamba-ssm
|
|
||||||
# peft
|
# peft
|
||||||
# sentence-transformers
|
# sentence-transformers
|
||||||
# transformers-stream-generator
|
# transformers-stream-generator
|
||||||
transformers-stream-generator==0.0.5
|
transformers-stream-generator==0.0.5
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
triton==3.3.1
|
triton==3.3.1
|
||||||
# via
|
# via torch
|
||||||
# mamba-ssm
|
|
||||||
# torch
|
|
||||||
tritonclient==2.51.0
|
tritonclient==2.51.0
|
||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
|
|||||||
@ -25,10 +25,8 @@ SSM_MODELS = [
|
|||||||
|
|
||||||
HYBRID_MODELS = [
|
HYBRID_MODELS = [
|
||||||
"ai21labs/Jamba-tiny-dev",
|
"ai21labs/Jamba-tiny-dev",
|
||||||
# NOTE: Running Plamo2 in transformers implementation requires to install
|
# skipping until vLLM implementation issues are resolved
|
||||||
# causal-conv1d package, which is not listed as a test dependency as it's
|
# "pfnet/plamo-2-1b",
|
||||||
# not compatible with pip-compile.
|
|
||||||
"pfnet/plamo-2-1b",
|
|
||||||
"Zyphra/Zamba2-1.2B-instruct",
|
"Zyphra/Zamba2-1.2B-instruct",
|
||||||
"hmellor/tiny-random-BambaForCausalLM",
|
"hmellor/tiny-random-BambaForCausalLM",
|
||||||
"ibm-ai-platform/Bamba-9B-v1",
|
"ibm-ai-platform/Bamba-9B-v1",
|
||||||
@ -83,12 +81,16 @@ def test_models(
|
|||||||
try:
|
try:
|
||||||
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
|
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
|
||||||
model_info.check_available_online(on_fail="skip")
|
model_info.check_available_online(on_fail="skip")
|
||||||
model_info.check_transformers_version(on_fail="skip")
|
hf_version_check = model_info.check_transformers_version(
|
||||||
|
on_fail="return")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
hf_version_check = None
|
||||||
|
|
||||||
|
if hf_version_check is not None:
|
||||||
|
print(f"Skipping transformers comparison because: {hf_version_check}")
|
||||||
|
|
||||||
with hf_runner(model) as hf_model:
|
with hf_runner(model) as hf_model:
|
||||||
if model not in HF_UNSUPPORTED_MODELS:
|
if model not in HF_UNSUPPORTED_MODELS and hf_version_check is None:
|
||||||
hf_outputs = hf_model.generate_greedy_logprobs_limit(
|
hf_outputs = hf_model.generate_greedy_logprobs_limit(
|
||||||
example_prompts, max_tokens, num_logprobs)
|
example_prompts, max_tokens, num_logprobs)
|
||||||
else:
|
else:
|
||||||
|
|||||||
@ -79,17 +79,17 @@ class _HfExamplesInfo:
|
|||||||
def check_transformers_version(
|
def check_transformers_version(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
on_fail: Literal["error", "skip"],
|
on_fail: Literal["error", "skip", "return"],
|
||||||
check_min_version: bool = True,
|
check_min_version: bool = True,
|
||||||
check_max_version: bool = True,
|
check_max_version: bool = True,
|
||||||
) -> None:
|
) -> Optional[str]:
|
||||||
"""
|
"""
|
||||||
If the installed transformers version does not meet the requirements,
|
If the installed transformers version does not meet the requirements,
|
||||||
perform the given action.
|
perform the given action.
|
||||||
"""
|
"""
|
||||||
if (self.min_transformers_version is None
|
if (self.min_transformers_version is None
|
||||||
and self.max_transformers_version is None):
|
and self.max_transformers_version is None):
|
||||||
return
|
return None
|
||||||
|
|
||||||
current_version = TRANSFORMERS_VERSION
|
current_version = TRANSFORMERS_VERSION
|
||||||
cur_base_version = Version(current_version).base_version
|
cur_base_version = Version(current_version).base_version
|
||||||
@ -105,16 +105,18 @@ class _HfExamplesInfo:
|
|||||||
and Version(cur_base_version) > Version(max_version)):
|
and Version(cur_base_version) > Version(max_version)):
|
||||||
msg += f"<={max_version}` is required to run this model."
|
msg += f"<={max_version}` is required to run this model."
|
||||||
else:
|
else:
|
||||||
return
|
return None
|
||||||
|
|
||||||
if self.transformers_version_reason:
|
if self.transformers_version_reason:
|
||||||
msg += f" Reason: {self.transformers_version_reason}"
|
msg += f" Reason: {self.transformers_version_reason}"
|
||||||
|
|
||||||
if on_fail == "error":
|
if on_fail == "error":
|
||||||
raise RuntimeError(msg)
|
raise RuntimeError(msg)
|
||||||
else:
|
elif on_fail == "skip":
|
||||||
pytest.skip(msg)
|
pytest.skip(msg)
|
||||||
|
|
||||||
|
return msg
|
||||||
|
|
||||||
def check_available_online(
|
def check_available_online(
|
||||||
self,
|
self,
|
||||||
*,
|
*,
|
||||||
@ -148,7 +150,8 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
|||||||
trust_remote_code=True),
|
trust_remote_code=True),
|
||||||
"BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
|
"BailingMoeForCausalLM": _HfExamplesInfo("inclusionAI/Ling-lite-1.5",
|
||||||
trust_remote_code=True),
|
trust_remote_code=True),
|
||||||
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B",
|
"BambaForCausalLM": _HfExamplesInfo("ibm-ai-platform/Bamba-9B-v1",
|
||||||
|
min_transformers_version="4.55.1",
|
||||||
extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501
|
extras={"tiny": "hmellor/tiny-random-BambaForCausalLM"}), # noqa: E501
|
||||||
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
|
"BloomForCausalLM": _HfExamplesInfo("bigscience/bloom-560m",
|
||||||
{"1b": "bigscience/bloomz-1b1"}),
|
{"1b": "bigscience/bloomz-1b1"}),
|
||||||
@ -223,6 +226,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
|||||||
trust_remote_code=True),
|
trust_remote_code=True),
|
||||||
"JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
|
"JAISLMHeadModel": _HfExamplesInfo("inceptionai/jais-13b-chat"),
|
||||||
"JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
|
"JambaForCausalLM": _HfExamplesInfo("ai21labs/AI21-Jamba-1.5-Mini",
|
||||||
|
min_transformers_version="4.55.1",
|
||||||
extras={
|
extras={
|
||||||
"tiny": "ai21labs/Jamba-tiny-dev",
|
"tiny": "ai21labs/Jamba-tiny-dev",
|
||||||
"random": "ai21labs/Jamba-tiny-random", # noqa: E501
|
"random": "ai21labs/Jamba-tiny-random", # noqa: E501
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user