From 31f5dc5b2a5da18bc17240c7a67e8770d00901d8 Mon Sep 17 00:00:00 2001 From: Yongye Zhu Date: Wed, 6 Aug 2025 11:41:42 -0700 Subject: [PATCH] [gpt-oss] Enhance error msg on attention sink init (#22335) Signed-off-by: simon-mo Signed-off-by: Yongye Zhu Co-authored-by: simon-mo --- vllm/v1/attention/backends/flashinfer.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index caf9ecc91108d..061bd5f1d277a 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -638,11 +638,15 @@ class FlashInferImpl(AttentionImpl): self.sinks: Optional[torch.Tensor] = None if sinks is not None: - assert sinks.shape[0] == num_heads, ( - "Sinks must have the same number of heads " - "as the number of heads in the layer" - ) - assert sinks.dtype == torch.float32, "Sinks must be of type float32" + if sinks.shape[0] != num_heads: + raise ValueError( + "Sinks must have the same number of heads as the number of " + f"heads in the layer. Expected {num_heads}, but got " + f"{sinks.shape[0]}." + ) + if sinks.dtype != torch.float32: + raise ValueError("Sinks must be of type float32, but got " + f"{sinks.dtype}.") self.sinks = sinks def forward(