From bc8ed3c4bad05f5b1d37f34d5a8d324c23e84785 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 23 Mar 2025 10:52:30 -0700 Subject: [PATCH] [V1][Spec Decode] Use better defaults for N-gram (#15358) Signed-off-by: Woosuk Kwon --- vllm/config.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 8b3c66cca50ea..1552fb280a268 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2012,18 +2012,30 @@ class SpeculativeConfig: if self.method in ("ngram", "[ngram]"): # Unified to "ngram" internally self.method = "ngram" - if self.prompt_lookup_min is None: - self.prompt_lookup_min = 1 - if self.prompt_lookup_max is None or self.prompt_lookup_max < 1: - raise ValueError("prompt_lookup_max=" - f"{self.prompt_lookup_max} must be > 0") + # Set default values if not provided + if (self.prompt_lookup_min is None + and self.prompt_lookup_max is None): + # TODO(woosuk): Tune these values. They are arbitrarily chosen. + self.prompt_lookup_min = 5 + self.prompt_lookup_max = 5 + elif self.prompt_lookup_min is None: + assert self.prompt_lookup_max is not None + self.prompt_lookup_min = self.prompt_lookup_max + elif self.prompt_lookup_max is None: + assert self.prompt_lookup_min is not None + self.prompt_lookup_max = self.prompt_lookup_min + + # Validate values if self.prompt_lookup_min < 1: - raise ValueError("prompt_lookup_min=" - f"{self.prompt_lookup_min} must be > 0") + raise ValueError( + f"prompt_lookup_min={self.prompt_lookup_min} must be > 0") + if self.prompt_lookup_max < 1: + raise ValueError( + f"prompt_lookup_max={self.prompt_lookup_max} must be > 0") if self.prompt_lookup_min > self.prompt_lookup_max: - raise ValueError(f"prompt_lookup_min={self.prompt_lookup_min} " - "cannot be larger than prompt_lookup_max=" - f"{self.prompt_lookup_max}") + raise ValueError( + f"prompt_lookup_min={self.prompt_lookup_min} must " + f"be <= prompt_lookup_max={self.prompt_lookup_max}") # TODO: current we still need extract vocab_size from target model # config, in future, we may try refactor it out, and set