From ca683a2a729d894286e7fef6afcb4d34b75e37ca Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Tue, 14 Oct 2025 06:40:59 -0700 Subject: [PATCH] use combo kernel to fuse qk-norm and qk-rope (#26682) Signed-off-by: Boyuan Feng --- vllm/config/compilation.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 5313112a19a6..60aef2f6f7e1 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -513,6 +513,16 @@ class CompilationConfig: if isinstance(self.pass_config, dict): self.pass_config = PassConfig(**self.pass_config) + if ( + is_torch_equal_or_newer("2.9.0.dev") + and "combo_kernels" not in self.inductor_compile_config + and "benchmark_combo_kernel" not in self.inductor_compile_config + ): + # use horizontal fusion, which is useful for fusing qk-norm and + # qk-rope when query and key have different shapes. + self.inductor_compile_config["combo_kernels"] = True + self.inductor_compile_config["benchmark_combo_kernel"] = True + # migrate the deprecated flags if not self.use_cudagraph: logger.warning(