[ux] Switch a warning to debug about a pytorch fallback (#23750)

Signed-off-by: Russell Bryant <rbryant@redhat.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
2026-01-07 23:00:54 +08:00 · 2025-09-25 10:38:16 -04:00 · 2025-09-25 10:38:16 -04:00 · 054c8b526f
commit 054c8b526f
parent 2469b8291b
1 changed files with 3 additions and 3 deletions
--- a/vllm/v1/sample/ops/topk_topp_sampler.py
+++ b/vllm/v1/sample/ops/topk_topp_sampler.py
@ -109,9 +109,9 @@ class TopKTopPSampler(nn.Module):
        # CPU-GPU synchronization while `flashinfer_sample` does.
        if (k is None and p is None) or generators:
            if generators:
-                logger.warning_once("FlashInfer 0.2.3+ does not support "
-                                    "per-request generators. Falling back to "
-                                    "PyTorch-native implementation.")
+                logger.debug_once("FlashInfer 0.2.3+ does not support "
+                                  "per-request generators. Falling back to "
+                                  "PyTorch-native implementation.")
            return self.forward_native(logits, generators, k, p)
        assert self.logprobs_mode not in (
            "processed_logits", "processed_logprobs"