diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index 43a40bce6847d..950cde82fb9d9 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -65,7 +65,11 @@ class TopKTopPSampler(nn.Module): ) self.forward = self.forward_native elif current_platform.is_cpu(): - if current_platform.get_cpu_architecture() == CpuArchEnum.RISCV: + arch = current_platform.get_cpu_architecture() + # Fall back to native implementation for POWERPC and RISCV. + # On PowerPC argmax produces incorrect output with torch.compile. + # PR: https://github.com/vllm-project/vllm/pull/26987 + if arch in (CpuArchEnum.RISCV, CpuArchEnum.POWERPC): self.forward = self.forward_native else: self.forward = self.forward_cpu