diff --git a/docs/design/moe_kernel_features.md b/docs/design/moe_kernel_features.md
index 6c02dcb76bec2..11c6e488f958f 100644
--- a/docs/design/moe_kernel_features.md
+++ b/docs/design/moe_kernel_features.md
@@ -92,7 +92,6 @@ To be used with a particular `FusedMoEPrepareAndFinalize` subclass, MoE kernels
| gpt oss triton | standard | N/A | N/A | 5 | Y | Y | [`triton_kernel_fused_experts`][vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe.triton_kernel_fused_experts],[`OAITritonExperts`][vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe.OAITritonExperts] |
| marlin | standard,batched | 3 / N/A | 3 / N/A | silu,swigluoai | Y | Y | [`fused_marlin_moe`][vllm.model_executor.layers.fused_moe.fused_marlin_moe.fused_marlin_moe],[`MarlinExperts`][vllm.model_executor.layers.fused_moe.fused_marlin_moe.MarlinExperts],[`BatchedMarlinExperts`][vllm.model_executor.layers.fused_moe.fused_marlin_moe.BatchedMarlinExperts] |
| trtllm | standard | mxfp4,nvfp4 | G(16),G(32) | 5 | N | Y | [`TrtLlmGenExperts`][vllm.model_executor.layers.fused_moe.trtllm_moe.TrtLlmGenExperts] |
-| pallas | standard | N/A | N/A | silu | N | N | [`fused_moe`][vllm.model_executor.layers.fused_moe.moe_pallas.fused_moe] |
| iterative | standard | N/A | N/A | silu | N | N | [`fused_moe`][vllm.model_executor.layers.fused_moe.moe_torch_iterative.fused_moe] |
| rocm aiter moe | standard | fp8 | G(128),A,T | silu, gelu | Y | N | [`rocm_aiter_fused_experts`][vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe.rocm_aiter_fused_experts] |
| cpu_fused_moe | standard | N/A | N/A | silu | N | N | [`CPUFusedMOE`][vllm.model_executor.layers.fused_moe.cpu_fused_moe.CPUFusedMOE] |
diff --git a/vllm/attention/layers/mm_encoder_attention.py b/vllm/attention/layers/mm_encoder_attention.py
index 1c1623b13f55a..138fc99114127 100644
--- a/vllm/attention/layers/mm_encoder_attention.py
+++ b/vllm/attention/layers/mm_encoder_attention.py
@@ -227,28 +227,3 @@ class MMEncoderAttention(CustomOp):
"XPU only supports FLASH_ATTN for vision attention."
)
return self._forward_fa(query, key, value, cu_seqlens, max_seqlen)
-
- def forward_tpu(
- self,
- query: torch.Tensor,
- key: torch.Tensor,
- value: torch.Tensor,
- cu_seqlens: torch.Tensor | None = None,
- max_seqlen: torch.Tensor | None = None, # Only used for Flash Attention
- ) -> torch.Tensor:
- assert self.attn_backend == AttentionBackendEnum.PALLAS, (
- f"MMEncoderAttention on TPU only supports PALLAS backend, "
- f"but got {self.attn_backend}."
- )
- if cu_seqlens is None:
- query, key, value = (x.transpose(1, 2) for x in (query, key, value))
- from torch_xla.experimental.custom_kernel import flash_attention
-
- out = flash_attention(query, key, value, sm_scale=self.scale)
- out = out.transpose(1, 2)
- return out
- logger.warning_once(
- "PALLAS backend with cu_seqlens is not supported for ViT yet. ",
- "Falling back to SDPA implementation.",
- )
- return self._forward_sdpa(query, key, value, cu_seqlens)