mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 03:37:54 +08:00
[BugFix] Fix false assertion with spec-decode=[2,4,..] and TP>2 (#29036)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
22e44ad589
commit
8f4f77a727
@ -921,7 +921,7 @@ class CompilationConfig:
|
|||||||
self, uniform_decode_query_len: int, tensor_parallel_size: int
|
self, uniform_decode_query_len: int, tensor_parallel_size: int
|
||||||
):
|
):
|
||||||
multiple_of = uniform_decode_query_len
|
multiple_of = uniform_decode_query_len
|
||||||
if tensor_parallel_size > 1:
|
if tensor_parallel_size > 1 and self.pass_config.enable_sequence_parallelism:
|
||||||
multiple_of = max(uniform_decode_query_len, tensor_parallel_size)
|
multiple_of = max(uniform_decode_query_len, tensor_parallel_size)
|
||||||
if (
|
if (
|
||||||
multiple_of % uniform_decode_query_len != 0
|
multiple_of % uniform_decode_query_len != 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user