[Ascend]: Fixed the issue where OOT Platform vllm-ascend could not enable SP in Eager mode (#28935)

Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
Nengjun Ma 2025-12-02 04:02:18 +08:00 committed by GitHub
parent 38caf7fa1a
commit eaf81485ed
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 15 additions and 2 deletions

View File

@ -855,6 +855,13 @@ class CompilationConfig:
self.compute_bs_to_padded_graph_size()
def set_splitting_ops_for_v1(self):
# To compatible with OOT hardware plugin platform (for example vllm-ascend)
# which currently only supports sequence parallelism in eager mode.
if self.mode != CompilationMode.VLLM_COMPILE:
if self.splitting_ops is None:
self.splitting_ops = []
return
# NOTE: this function needs to be called only when mode is
# CompilationMode.VLLM_COMPILE
assert self.mode == CompilationMode.VLLM_COMPILE, (

View File

@ -797,8 +797,7 @@ class VllmConfig:
), "MTP with cp_kv_cache_interleave_size > 1 is not supported now."
# Do this after all the updates to compilation_config.mode
if self.compilation_config.mode == CompilationMode.VLLM_COMPILE:
self.compilation_config.set_splitting_ops_for_v1()
self.compilation_config.set_splitting_ops_for_v1()
if self.compilation_config.pass_config.enable_sequence_parallelism:
# With pipeline parallelism or dynamo partitioning,
@ -806,6 +805,13 @@ class VllmConfig:
# Use custom rms norm to unblock. In the future,
# the pass will operate on higher-level IR to avoid the issue.
# TODO: https://github.com/vllm-project/vllm/issues/27894
if self.compilation_config.mode != CompilationMode.VLLM_COMPILE:
logger.warning(
"Sequence parallelism is enabled, but running in wrong "
"vllm compile mode: %s.",
self.compilation_config.mode,
)
is_fullgraph = (
self.compilation_config.use_inductor_graph_partition
or len(self.compilation_config.splitting_ops) == 0