From 6cbc4d4bea2f0169846accbecead8f0fccebd761 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Wed, 16 Jul 2025 04:19:10 +0200 Subject: [PATCH] [Model] Add ModelConfig class for GraniteMoeHybrid to override default max_seq_len_to_capture (#20923) Signed-off-by: Thomas Parnell --- vllm/model_executor/models/config.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 6c6f8e7268b6e..cb07fe7d9e1dc 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -205,6 +205,19 @@ class SnowflakeGteNewModelConfig(VerifyAndUpdateConfig): } +class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig): + + @staticmethod + def verify_and_update_config(vllm_config: "VllmConfig") -> None: + config = vllm_config.model_config + config.max_seq_len_to_capture = config.max_model_len + logger.info( + "Setting max_seq_len_to_capture to %d " + "to ensure that CUDA graph capture " + "covers sequences of length up to max_model_len.", + config.max_model_len) + + class HybridAttentionMambaModelConfig(VerifyAndUpdateConfig): @classmethod @@ -297,4 +310,5 @@ MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = { "Qwen3ForSequenceClassification": Qwen3ForSequenceClassificationConfig, "XLMRobertaModel": JinaRobertaModelConfig, "JinaVLForRanking": JinaVLForSequenceClassificationConfig, + "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig, }