mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-23 18:44:30 +08:00
[BugFix] Fix false assertion with spec-decode=[2,4,..] and TP>2 (#29036)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
22e44ad589
commit
8f4f77a727
@ -921,7 +921,7 @@ class CompilationConfig:
|
||||
self, uniform_decode_query_len: int, tensor_parallel_size: int
|
||||
):
|
||||
multiple_of = uniform_decode_query_len
|
||||
if tensor_parallel_size > 1:
|
||||
if tensor_parallel_size > 1 and self.pass_config.enable_sequence_parallelism:
|
||||
multiple_of = max(uniform_decode_query_len, tensor_parallel_size)
|
||||
if (
|
||||
multiple_of % uniform_decode_query_len != 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user