mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-07 08:59:09 +08:00
[Spec Decode] Don't fall back to V0 when spec decoding is enabled (#18265)
This commit is contained in:
parent
e73b7dfd69
commit
fabe89bbc4
@ -1325,7 +1325,7 @@ class EngineArgs:
|
|||||||
recommend_to_remove=False)
|
recommend_to_remove=False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Only Ngram speculative decoding so far.
|
# V1 supports N-gram, Medusa, and Eagle speculative decoding.
|
||||||
is_ngram_enabled = False
|
is_ngram_enabled = False
|
||||||
is_eagle_enabled = False
|
is_eagle_enabled = False
|
||||||
is_medusa_enabled = False
|
is_medusa_enabled = False
|
||||||
@ -1390,14 +1390,6 @@ class EngineArgs:
|
|||||||
_raise_or_fallback(feature_name=name, recommend_to_remove=False)
|
_raise_or_fallback(feature_name=name, recommend_to_remove=False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# ngram is supported on V1, but off by default for now.
|
|
||||||
if is_ngram_enabled and _warn_or_fallback("ngram"):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Eagle is under development, so we don't support it yet.
|
|
||||||
if is_eagle_enabled and _warn_or_fallback("Eagle"):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
|
# Non-[CUDA, TPU] may be supported on V1, but off by default for now.
|
||||||
v0_hardware = not any(
|
v0_hardware = not any(
|
||||||
(current_platform.is_cuda(), current_platform.is_tpu()))
|
(current_platform.is_cuda(), current_platform.is_tpu()))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user