[main][BugFix] Fixed an accuracy bug of Qwen3-next-MTP when batched inferring (#30632)

Signed-off-by: drslark <slarksblood@qq.com>
2026-06-21 21:37:17 +08:00 · 2025-12-14 17:32:16 +08:00 · 2025-12-14 17:32:16 +08:00 · add1b9d3de
commit add1b9d3de
parent dcb31196da
1 changed files with 1 additions and 1 deletions
--- a/vllm/v1/attention/backends/gdn_attn.py
+++ b/vllm/v1/attention/backends/gdn_attn.py
@ -211,7 +211,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
                spec_token_masks = torch.repeat_interleave(
                    spec_sequence_masks, query_lens
                )
-                index = torch.argsort(spec_token_masks)
+                index = torch.argsort(spec_token_masks, stable=True)
                num_non_spec_tokens = num_prefill_tokens + num_decode_tokens
                non_spec_token_indx = index[:num_non_spec_tokens]
                spec_token_indx = index[num_non_spec_tokens:]