From ed94e4f427bce8611e198d051dbd3b0097b448e8 Mon Sep 17 00:00:00 2001
From: tomeras91 <57313761+tomeras91@users.noreply.github.com>
Date: Sat, 27 Jul 2024 06:45:31 +0300
Subject: [PATCH] [Bugfix][Model] Jamba assertions and no chunked prefill by
 default for Jamba (#6784)

---
 vllm/engine/arg_utils.py            | 6 +++++-
 vllm/model_executor/models/jamba.py | 5 +++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index cd64d3345b830..bad5be4917216 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -754,10 +754,14 @@ class EngineArgs:
                 use_sliding_window = (model_config.get_sliding_window()
                                       is not None)
                 use_spec_decode = self.speculative_model is not None
+                has_seqlen_agnostic_layers = (
+                    model_config.contains_seqlen_agnostic_layers(
+                        parallel_config))
                 if (is_gpu and not use_sliding_window and not use_spec_decode
                         and not self.enable_lora
                         and not self.enable_prompt_adapter
-                        and not self.enable_prefix_caching):
+                        and not self.enable_prefix_caching
+                        and not has_seqlen_agnostic_layers):
                     self.enable_chunked_prefill = True
                     logger.warning(
                         "Chunked prefill is enabled by default for models with "
diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py
index d4e4f0055aa2b..3444578227259 100644
--- a/vllm/model_executor/models/jamba.py
+++ b/vllm/model_executor/models/jamba.py
@@ -644,6 +644,11 @@ class JambaForCausalLM(nn.Module, HasInnerState):
         lora_config: Optional[LoRAConfig] = None,
         scheduler_config: Optional[SchedulerConfig] = None,
     ) -> None:
+        assert not scheduler_config.chunked_prefill_enabled, \
+            "Jamba currently does not support chunked prefill"
+        assert not cache_config.enable_prefix_caching, \
+            "Jamba currently does not support prefix caching"
+
         super().__init__()
         self.config = config
         self.scheduler_config = scheduler_config