[Bugfix][Hardware][RISC-V] Limit supported dtypes to float32 to avoid scheduler segfault (#26228)

Signed-off-by: lyd1992 <liuyudong@iscas.ac.cn> Signed-off-by: ihb2032 <1355790728@qq.com>
2025-12-10 00:25:01 +08:00 · 2025-10-05 18:36:54 +08:00 · 2025-10-05 18:36:54 +08:00 · 3303cfb4ac
commit 3303cfb4ac
parent b7e8e4e6be
1 changed files with 25 additions and 0 deletions
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@ -84,6 +84,31 @@ class CpuPlatform(Platform):
                    shell=True).strip() == b"1"):
                return [torch.bfloat16, torch.float16, torch.float32]
            return [torch.float16, torch.float32]
+        elif self.get_cpu_architecture() == CpuArchEnum.RISCV:
+            # Workaround for Issue #25655: RISC-V scheduler bug with float16
+            #
+            # Background:
+            # - RISC-V currently uses scalar code path
+            # - There is a latent bug in the vLLM scheduler that provides
+            # invalid
+            #   physical_block_idx values under certain conditions
+            # - This bug causes segmentation faults when using float16
+            # dtype on RISC-V
+            # - Testing shows that forcing float32 successfully bypasses
+            # this issue
+            #
+            # Technical details:
+            # - The bug manifests as out-of-bounds physical_block_idx in
+            # block_tables
+            # - Only occurs on RISC-V hardware
+            # tested on Sophgo SG2044
+            # - Does not reproduce on x86 or other architectures
+            # - Root cause is in Python-level scheduling logic,
+            # not C++ kernels
+            #
+            # This is a temporary workaround until the scheduler bug is fixed.
+            # See: https://github.com/vllm-project/vllm/issues/25655
+            return [torch.float32]
        # x86/aarch64 CPU has supported both bf16 and fp16 natively.
        return [torch.bfloat16, torch.float16, torch.float32]