From fabe89bbc41b34558558c0d5401e23776460eecb Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 16 May 2025 16:10:27 -0700 Subject: [PATCH] [Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265) --- vllm/engine/arg_utils.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index dc2bb3a52cac1..f0c6b15b79da3 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1325,7 +1325,7 @@ class EngineArgs: recommend_to_remove=False) return False - # Only Ngram speculative decoding so far. + # V1 supports N-gram, Medusa, and Eagle speculative decoding. is_ngram_enabled = False is_eagle_enabled = False is_medusa_enabled = False @@ -1390,14 +1390,6 @@ class EngineArgs: _raise_or_fallback(feature_name=name, recommend_to_remove=False) return False - # ngram is supported on V1, but off by default for now. - if is_ngram_enabled and _warn_or_fallback("ngram"): - return False - - # Eagle is under development, so we don't support it yet. - if is_eagle_enabled and _warn_or_fallback("Eagle"): - return False - # Non-[CUDA, TPU] may be supported on V1, but off by default for now. v0_hardware = not any( (current_platform.is_cuda(), current_platform.is_tpu()))