diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 1c3ef502f0f45..abdae49106120 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -664,6 +664,8 @@ class CompilationConfig: is_torch_equal_or_newer("2.9.0.dev") and "combo_kernels" not in self.inductor_compile_config and "benchmark_combo_kernel" not in self.inductor_compile_config + # (fixme @boyuan) combo kernel does not support cpu yet. + and not current_platform.is_cpu() ): # use horizontal fusion, which is useful for fusing qk-norm and # qk-rope when query and key have different shapes.