From ed94e4f427bce8611e198d051dbd3b0097b448e8 Mon Sep 17 00:00:00 2001 From: tomeras91 <57313761+tomeras91@users.noreply.github.com> Date: Sat, 27 Jul 2024 06:45:31 +0300 Subject: [PATCH] [Bugfix][Model] Jamba assertions and no chunked prefill by default for Jamba (#6784) --- vllm/engine/arg_utils.py | 6 +++++- vllm/model_executor/models/jamba.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index cd64d3345b830..bad5be4917216 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -754,10 +754,14 @@ class EngineArgs: use_sliding_window = (model_config.get_sliding_window() is not None) use_spec_decode = self.speculative_model is not None + has_seqlen_agnostic_layers = ( + model_config.contains_seqlen_agnostic_layers( + parallel_config)) if (is_gpu and not use_sliding_window and not use_spec_decode and not self.enable_lora and not self.enable_prompt_adapter - and not self.enable_prefix_caching): + and not self.enable_prefix_caching + and not has_seqlen_agnostic_layers): self.enable_chunked_prefill = True logger.warning( "Chunked prefill is enabled by default for models with " diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index d4e4f0055aa2b..3444578227259 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -644,6 +644,11 @@ class JambaForCausalLM(nn.Module, HasInnerState): lora_config: Optional[LoRAConfig] = None, scheduler_config: Optional[SchedulerConfig] = None, ) -> None: + assert not scheduler_config.chunked_prefill_enabled, \ + "Jamba currently does not support chunked prefill" + assert not cache_config.enable_prefix_caching, \ + "Jamba currently does not support prefix caching" + super().__init__() self.config = config self.scheduler_config = scheduler_config