From 532a6cfccbaa1bc943512cca06b48d5d3500669d Mon Sep 17 00:00:00 2001
From: Russell Bryant <rbryant@redhat.com>
Date: Thu, 25 Sep 2025 10:38:16 -0400
Subject: [PATCH] [ux] Switch a warning to debug about a pytorch fallback
 (#23750)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
---
 vllm/v1/sample/ops/topk_topp_sampler.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py
index d3c5019f1228..5bcf1b585441 100644
--- a/vllm/v1/sample/ops/topk_topp_sampler.py
+++ b/vllm/v1/sample/ops/topk_topp_sampler.py
@@ -109,9 +109,9 @@ class TopKTopPSampler(nn.Module):
         # CPU-GPU synchronization while `flashinfer_sample` does.
         if (k is None and p is None) or generators:
             if generators:
-                logger.warning_once("FlashInfer 0.2.3+ does not support "
-                                    "per-request generators. Falling back to "
-                                    "PyTorch-native implementation.")
+                logger.debug_once("FlashInfer 0.2.3+ does not support "
+                                  "per-request generators. Falling back to "
+                                  "PyTorch-native implementation.")
             return self.forward_native(logits, generators, k, p)
         assert self.logprobs_mode not in (
             "processed_logits", "processed_logprobs"