mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 19:21:53 +08:00
[Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265)
This commit is contained in:
parent
e73b7dfd69
commit
fabe89bbc4
@ -1325,7 +1325,7 @@ class EngineArgs:
|
||||
recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# Only Ngram speculative decoding so far.
|
||||
# V1 supports N-gram, Medusa, and Eagle speculative decoding.
|
||||
is_ngram_enabled = False
|
||||
is_eagle_enabled = False
|
||||
is_medusa_enabled = False
|
||||
@ -1390,14 +1390,6 @@ class EngineArgs:
|
||||
_raise_or_fallback(feature_name=name, recommend_to_remove=False)
|
||||
return False
|
||||
|
||||
# ngram is supported on V1, but off by default for now.
|
||||
if is_ngram_enabled and _warn_or_fallback("ngram"):
|
||||
return False
|
||||
|
||||
# Eagle is under development, so we don't support it yet.
|
||||
if is_eagle_enabled and _warn_or_fallback("Eagle"):
|
||||
return False
|
||||
|
||||
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
|
||||
v0_hardware = not any(
|
||||
(current_platform.is_cuda(), current_platform.is_tpu()))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user