[Bugfix][Hardware][RISC-V] Limit supported dtypes to float32 to avoid scheduler segfault (#26228)

Signed-off-by: lyd1992 <liuyudong@iscas.ac.cn> Signed-off-by: ihb2032 <1355790728@qq.com>
2025-12-10 05:25:00 +08:00 · 2025-10-05 18:36:54 +08:00 · 2025-10-05 18:36:54 +08:00 · 3303cfb4ac
commit 3303cfb4ac
parent b7e8e4e6be
1 changed files with 25 additions and 0 deletions
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@ -84,6 +84,31 @@ class CpuPlatform(Platform):
                    shell=True).strip() == b"1"):
                return [torch.bfloat16, torch.float16, torch.float32]
            return [torch.float16, torch.float32]
        elif self.get_cpu_architecture() == CpuArchEnum.RISCV:
            # Workaround for Issue #25655: RISC-V scheduler bug with float16
            #
            # Background:
            # - RISC-V currently uses scalar code path
            # - There is a latent bug in the vLLM scheduler that provides
            # invalid
            #   physical_block_idx values under certain conditions
            # - This bug causes segmentation faults when using float16
            # dtype on RISC-V
            # - Testing shows that forcing float32 successfully bypasses
            # this issue
            #
            # Technical details:
            # - The bug manifests as out-of-bounds physical_block_idx in
            # block_tables
            # - Only occurs on RISC-V hardware
            # tested on Sophgo SG2044
            # - Does not reproduce on x86 or other architectures
            # - Root cause is in Python-level scheduling logic,
            # not C++ kernels
            #
            # This is a temporary workaround until the scheduler bug is fixed.
            # See: https://github.com/vllm-project/vllm/issues/25655
            return [torch.float32]
        # x86/aarch64 CPU has supported both bf16 and fp16 natively.
        return [torch.bfloat16, torch.float16, torch.float32]