[Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265)

This commit is contained in:
Woosuk Kwon 2025-05-16 16:10:27 -07:00 committed by GitHub
parent e73b7dfd69
commit fabe89bbc4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1325,7 +1325,7 @@ class EngineArgs:
recommend_to_remove=False)
return False
# Only Ngram speculative decoding so far.
# V1 supports N-gram, Medusa, and Eagle speculative decoding.
is_ngram_enabled = False
is_eagle_enabled = False
is_medusa_enabled = False
@ -1390,14 +1390,6 @@ class EngineArgs:
_raise_or_fallback(feature_name=name, recommend_to_remove=False)
return False
# ngram is supported on V1, but off by default for now.
if is_ngram_enabled and _warn_or_fallback("ngram"):
return False
# Eagle is under development, so we don't support it yet.
if is_eagle_enabled and _warn_or_fallback("Eagle"):
return False
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
v0_hardware = not any(
(current_platform.is_cuda(), current_platform.is_tpu()))