misc fixes

Signed-off-by: Sage Moore <sage@neuralmagic.com>
2026-07-08 13:27:14 +08:00 · 2025-08-05 19:23:23 +00:00 · 2025-08-05 19:23:23 +00:00 · 6b0c303ab4
commit 6b0c303ab4
parent 4819bb8715
2 changed files with 2 additions and 2 deletions
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@ -189,7 +189,6 @@ def _support_torch_compile(
            CompilationLevel.NO_COMPILATION, CompilationLevel.DYNAMO_AS_IS
        ] or not supports_dynamo() or _should_ignore_torch_compile(
            self.__class__)
        self.do_not_compile = True
        if self.do_not_compile:
            return
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -1918,11 +1918,12 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
                                       scheduler_output, is_dummy_run)
            # if is_global_first_rank():
            #     logger.info(f"RUNNING FULL BATCH {num_scheduled_tokens}")
            skip_cuda_graphs = self.parallel_config.enable_microbatching
            with set_forward_context(attn_metadata,
                                     vllm_config=self.vllm_config,
                                     num_tokens=num_scheduled_tokens or 1,
                                     num_tokens_across_dp=num_tokens_across_dp,
-                                     skip_cuda_graphs=True):
+                                     skip_cuda_graphs=skip_cuda_graphs):
                return self.model(
                    input_ids=input_ids,
                    positions=positions,