more fixes

Signed-off-by: Sage Moore <sage@neuralmagic.com>
2026-06-14 08:07:15 +08:00 · 2025-06-18 13:58:40 +00:00 · 2025-06-18 13:58:40 +00:00 · ff2dd13145
commit ff2dd13145
parent 0889f66297
3 changed files with 2 additions and 10 deletions
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@ -170,7 +170,7 @@ def _support_torch_compile(
        # e.g. TPU has the compilation logic in model runner, so we don't
        # need to compile the model inside.
        if self.do_not_compile or torch.compiler.is_compiling():
-            logger.info("SKIPPING COMPILATION")
+            # logger.info("SKIPPING COMPILATION")
            return self.forward(*args, **kwargs)
        # the first compilation needs to have dynamic shapes marked
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@ -337,14 +337,6 @@ class FlashAttentionMetadataBuilder(
        # populated on first build() call.
        self.aot_sliding_window: Optional[tuple[int, int]] = None
    def build(
        self, common_prefix_len: int,
        common_attn_metadata: CommonAttentionMetadata
    ) -> FlashAttentionMetadata:
        num_reqs = common_attn_metadata.num_reqs
        num_actual_tokens = common_attn_metadata.num_actual_tokens
        max_query_len = common_attn_metadata.max_query_len
    def build_slice(
        self,
        req_slice: slice,
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@ -610,7 +610,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
        num_actual_tokens = common_attn_metadata.num_actual_tokens
        max_query_len = common_attn_metadata.max_query_len
-        assert self._num_decodes + self._num_prefills == num_reqs
+        # assert self._num_decodes + self._num_prefills == num_reqs
        return self.build_slice(
            req_slice=slice(0, num_reqs),
            token_slice=slice(0, num_actual_tokens),