[Logs] Change flashinfer sampler logs to once (#21759)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-07-28 09:59:51 -04:00 committed by GitHub
parent 31084b3b1f
commit 34a20c49b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -33,7 +33,7 @@ class TopKTopPSampler(nn.Module):
if is_flashinfer_available:
flashinfer_version = flashinfer.__version__
if flashinfer_version < "0.2.3":
logger.warning(
logger.warning_once(
"FlashInfer version >= 0.2.3 required. "
"Falling back to default sampling implementation.")
self.forward = self.forward_native
@ -46,17 +46,18 @@ class TopKTopPSampler(nn.Module):
# None means False, while in V1, None means True. This is
# why we use the condition
# `envs.VLLM_USE_FLASHINFER_SAMPLER is not False` here.
logger.info("Using FlashInfer for top-p & top-k sampling.")
logger.info_once(
"Using FlashInfer for top-p & top-k sampling.")
self.forward = self.forward_cuda
else:
logger.warning(
logger.warning_once(
"FlashInfer is available, but it is not enabled. "
"Falling back to the PyTorch-native implementation of "
"top-p & top-k sampling. For the best performance, "
"please set VLLM_USE_FLASHINFER_SAMPLER=1.")
self.forward = self.forward_native
else:
logger.warning(
logger.warning_once(
"FlashInfer is not available. Falling back to the PyTorch-"
"native implementation of top-p & top-k sampling. For the "
"best performance, please install FlashInfer.")
@ -97,9 +98,9 @@ class TopKTopPSampler(nn.Module):
probs = logits.softmax(dim=-1, dtype=torch.float32)
return random_sample(probs, generators)
if generators:
logger.warning("FlashInfer 0.2.3+ does not support "
"per-request generators. Falling back to "
"PyTorch-native implementation.")
logger.warning_once("FlashInfer 0.2.3+ does not support "
"per-request generators. Falling back to "
"PyTorch-native implementation.")
return self.forward_native(logits, generators, k, p)
# flashinfer sampling functions expect contiguous logits.
# In flex_attn/triton_attn fp32 inference, logits can be non-contiguous