diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index ca01cb3fb55d5..1c3ef502f0f45 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -921,7 +921,7 @@ class CompilationConfig: self, uniform_decode_query_len: int, tensor_parallel_size: int ): multiple_of = uniform_decode_query_len - if tensor_parallel_size > 1: + if tensor_parallel_size > 1 and self.pass_config.enable_sequence_parallelism: multiple_of = max(uniform_decode_query_len, tensor_parallel_size) if ( multiple_of % uniform_decode_query_len != 0