diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 7940b359a150..0f69a18a1f3f 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -1145,7 +1145,7 @@ class IpexMxfp4MoEMethod(Mxfp4MoEMethod): ) -> torch.Tensor: assert activation == "swigluoai", ( "Only swiglu_oai activation is supported for IPEX MXFP4 MoE" - ) # noqa: + ) hidden_size_pad = round_up(self.original_hidden_size, 128) x_pad = torch.nn.functional.pad(x, (0, hidden_size_pad - x.size(-1))) hidden_states = layer.ipex_fusion( diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 3a8e174f2b74..0309ae0fe962 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -64,8 +64,6 @@ class XPUPlatform(Platform): if use_sparse: raise NotImplementedError("Sparse Attention is not supported on XPU.") - if not use_v1: - raise ValueError("XPU backend only supports V1.") if selected_backend == AttentionBackendEnum.TRITON_ATTN: logger.info_once("Using Triton backend.") return AttentionBackendEnum.TRITON_ATTN.get_path()