From 532a6cfccbaa1bc943512cca06b48d5d3500669d Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 25 Sep 2025 10:38:16 -0400 Subject: [PATCH] [ux] Switch a warning to debug about a pytorch fallback (#23750) Signed-off-by: Russell Bryant --- vllm/v1/sample/ops/topk_topp_sampler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index d3c5019f1228..5bcf1b585441 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -109,9 +109,9 @@ class TopKTopPSampler(nn.Module): # CPU-GPU synchronization while `flashinfer_sample` does. if (k is None and p is None) or generators: if generators: - logger.warning_once("FlashInfer 0.2.3+ does not support " - "per-request generators. Falling back to " - "PyTorch-native implementation.") + logger.debug_once("FlashInfer 0.2.3+ does not support " + "per-request generators. Falling back to " + "PyTorch-native implementation.") return self.forward_native(logits, generators, k, p) assert self.logprobs_mode not in ( "processed_logits", "processed_logprobs"