[Model] Add ModelConfig class for GraniteMoeHybrid to override default max_seq_len_to_capture (#20923)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
2026-07-22 01:57:20 +08:00 · 2025-07-16 04:19:10 +02:00 · 2025-07-16 04:19:10 +02:00 · 6cbc4d4bea
commit 6cbc4d4bea
parent 153c6f1e61
1 changed files with 14 additions and 0 deletions
--- a/vllm/model_executor/models/config.py
+++ b/vllm/model_executor/models/config.py
@ -205,6 +205,19 @@ class SnowflakeGteNewModelConfig(VerifyAndUpdateConfig):
        }
 class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig):
    @staticmethod
    def verify_and_update_config(vllm_config: "VllmConfig") -> None:
        config = vllm_config.model_config
        config.max_seq_len_to_capture = config.max_model_len
        logger.info(
            "Setting max_seq_len_to_capture to %d "
            "to ensure that CUDA graph capture "
            "covers sequences of length up to max_model_len.",
            config.max_model_len)
 class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig):
    @classmethod
@ -297,4 +310,5 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = {
    "Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig,
    "XLMRobertaModel": JinaRobertaModelConfig,
    "JinaVLForRanking": JinaVLForSequenceClassificationConfig,
    "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig,
 }