From 3224ea9915750cdd714d85c843264923ef4018cc Mon Sep 17 00:00:00 2001
From: Ilya Markov <markovilya197@gmail.com>
Date: Sun, 14 Dec 2025 11:15:11 +0100
Subject: [PATCH] [torch.compile] Add encoder tag for compilation (#30489)

Signed-off-by: ilmarkov <markovilya197@gmail.com>
---
 vllm/compilation/backends.py             | 11 ++++++++++-
 vllm/compilation/piecewise_backend.py    |  7 +------
 vllm/model_executor/models/qwen2_5_vl.py |  6 +++---
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index 8fcd2b42e13bb..a1eec7d74483f 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -463,21 +463,27 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):
 # the tag for the part of model being compiled,
 # e.g. backbone/eagle_head
 model_tag: str = "backbone"
+model_is_encoder: bool = False
 
 
 @contextmanager
-def set_model_tag(tag: str):
+def set_model_tag(tag: str, is_encoder: bool = False):
     """Context manager to set the model tag."""
     global model_tag
+    global model_is_encoder
     assert tag != model_tag, (
         f"Model tag {tag} is the same as the current tag {model_tag}."
     )
     old_tag = model_tag
+    old_is_encoder = model_is_encoder
+
     model_tag = tag
+    model_is_encoder = is_encoder
     try:
         yield
     finally:
         model_tag = old_tag
+        model_is_encoder = old_is_encoder
 
 
 class VllmBackend:
@@ -523,6 +529,9 @@ class VllmBackend:
         # them, e.g. backbone (default), eagle_head, etc.
         self.prefix = prefix or model_tag
 
+        # Mark compilation for encoder.
+        self.is_encoder = model_is_encoder
+
         # Passes to run on the graph post-grad.
         self.pass_manager = resolve_obj_by_qualname(
             current_platform.get_pass_manager_cls()
diff --git a/vllm/compilation/piecewise_backend.py b/vllm/compilation/piecewise_backend.py
index a15c693767a51..58d3e2a14b22a 100644
--- a/vllm/compilation/piecewise_backend.py
+++ b/vllm/compilation/piecewise_backend.py
@@ -53,12 +53,7 @@ class PiecewiseBackend:
         self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1
 
         self.is_full_graph = total_piecewise_compiles == 1
-        # TODO: we need to generalize encoder compilation to other models
-        self.is_encoder_compilation = vllm_backend.prefix in [
-            "Qwen2_5_VisionPatchEmbed",
-            "Qwen2_5_VisionPatchMerger",
-            "Qwen2_5_VisionBlock",
-        ]
+        self.is_encoder_compilation = vllm_backend.is_encoder
 
         self.compile_ranges = self.compilation_config.get_compile_ranges()
         if self.is_encoder_compilation:
diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py
index fba06e34f6227..4320e8644f751 100644
--- a/vllm/model_executor/models/qwen2_5_vl.py
+++ b/vllm/model_executor/models/qwen2_5_vl.py
@@ -612,7 +612,7 @@ class Qwen2_5_VisionTransformer(nn.Module):
         # DO NOT MOVE THIS IMPORT
         from vllm.compilation.backends import set_model_tag
 
-        with set_model_tag("Qwen2_5_VisionPatchEmbed"):
+        with set_model_tag("Qwen2_5_VisionPatchEmbed", is_encoder=True):
             self.patch_embed = Qwen2_5_VisionPatchEmbed(
                 patch_size=patch_size,
                 temporal_patch_size=temporal_patch_size,
@@ -651,7 +651,7 @@ class Qwen2_5_VisionTransformer(nn.Module):
                 f"Qwen2.5-VL does not support {self.attn_backend} backend now."
             )
 
-        with set_model_tag("Qwen2_5_VisionBlock"):
+        with set_model_tag("Qwen2_5_VisionBlock", is_encoder=True):
             self.blocks = nn.ModuleList(
                 [
                     Qwen2_5_VisionBlock(
@@ -670,7 +670,7 @@ class Qwen2_5_VisionTransformer(nn.Module):
                 ]
             )
 
-        with set_model_tag("Qwen2_5_VisionPatchMerger"):
+        with set_model_tag("Qwen2_5_VisionPatchMerger", is_encoder=True):
             self.merger = Qwen2_5_VisionPatchMerger(
                 d_model=vision_config.out_hidden_size,
                 context_dim=self.hidden_size,