From d7fb10c574a3a9cbf596bec086bf02603b71c5c8 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Wed, 29 Oct 2025 19:39:57 -0700 Subject: [PATCH] [Bugfix] mamba-block-size is set for vision language model (#27773) Signed-off-by: Chen Zhang --- vllm/config/cache.py | 10 +--------- vllm/config/vllm.py | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 1734f6b15d4af..d743d5aa9dd29 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -5,7 +5,7 @@ import hashlib from dataclasses import field from typing import TYPE_CHECKING, Any, Literal -from pydantic import Field, SkipValidation, field_validator, model_validator +from pydantic import Field, SkipValidation, field_validator from pydantic.dataclasses import dataclass from vllm.config.utils import config @@ -185,11 +185,3 @@ class CacheConfig: raise ValueError("Too large swap space. " + msg) elif cpu_memory_usage > 0.4 * total_cpu_memory: logger.warning("Possibly too large swap space. %s", msg) - - @model_validator(mode="after") - def validate_mamba_block_size(self) -> "CacheConfig": - if self.mamba_block_size is not None and not self.enable_prefix_caching: - raise ValueError( - "--mamba-block-size can only be set with --enable-prefix-caching" - ) - return self diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index a7f7f3b45abea..c46f409edab61 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, TypeVar import torch -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, model_validator from pydantic.dataclasses import dataclass import vllm.envs as envs @@ -943,6 +943,20 @@ class VllmConfig: f"compilation_config={self.compilation_config!r}" ) + @model_validator(mode="after") + def validate_mamba_block_size(self) -> "VllmConfig": + if self.model_config is None: + return self + mamba_block_size_is_set = ( + self.cache_config.mamba_block_size is not None + and self.cache_config.mamba_block_size != self.model_config.max_model_len + ) + if mamba_block_size_is_set and not self.cache_config.enable_prefix_caching: + raise ValueError( + "--mamba-block-size can only be set with --enable-prefix-caching" + ) + return self + _current_vllm_config: VllmConfig | None = None _current_prefix: str | None = None