[Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265)

2025-12-25 19:21:53 +08:00 · 2025-05-16 16:10:27 -07:00 · 2025-05-16 16:10:27 -07:00 · fabe89bbc4
commit fabe89bbc4
parent e73b7dfd69
1 changed files with 1 additions and 9 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -1325,7 +1325,7 @@ class EngineArgs:
                               recommend_to_remove=False)
            return False

-        # Only Ngram speculative decoding so far.
+        # V1 supports N-gram, Medusa, and Eagle speculative decoding.
        is_ngram_enabled = False
        is_eagle_enabled = False
        is_medusa_enabled = False
@ -1390,14 +1390,6 @@ class EngineArgs:
            _raise_or_fallback(feature_name=name, recommend_to_remove=False)
            return False

-        # ngram is supported on V1, but off by default for now.
-        if is_ngram_enabled and _warn_or_fallback("ngram"):
-            return False
-
-        # Eagle is under development, so we don't support it yet.
-        if is_eagle_enabled and _warn_or_fallback("Eagle"):
-            return False
-
        # Non-[CUDA, TPU] may be supported on V1, but off by default for now.
        v0_hardware = not any(
            (current_platform.is_cuda(), current_platform.is_tpu()))