mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-24 00:14:34 +08:00
[Ascend]: Fixed the issue where OOT Platform vllm-ascend could not enable SP in Eager mode (#28935)
Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
parent
38caf7fa1a
commit
eaf81485ed
@ -855,6 +855,13 @@ class CompilationConfig:
|
||||
self.compute_bs_to_padded_graph_size()
|
||||
|
||||
def set_splitting_ops_for_v1(self):
|
||||
# To compatible with OOT hardware plugin platform (for example vllm-ascend)
|
||||
# which currently only supports sequence parallelism in eager mode.
|
||||
if self.mode != CompilationMode.VLLM_COMPILE:
|
||||
if self.splitting_ops is None:
|
||||
self.splitting_ops = []
|
||||
return
|
||||
|
||||
# NOTE: this function needs to be called only when mode is
|
||||
# CompilationMode.VLLM_COMPILE
|
||||
assert self.mode == CompilationMode.VLLM_COMPILE, (
|
||||
|
||||
@ -797,8 +797,7 @@ class VllmConfig:
|
||||
), "MTP with cp_kv_cache_interleave_size > 1 is not supported now."
|
||||
|
||||
# Do this after all the updates to compilation_config.mode
|
||||
if self.compilation_config.mode == CompilationMode.VLLM_COMPILE:
|
||||
self.compilation_config.set_splitting_ops_for_v1()
|
||||
self.compilation_config.set_splitting_ops_for_v1()
|
||||
|
||||
if self.compilation_config.pass_config.enable_sequence_parallelism:
|
||||
# With pipeline parallelism or dynamo partitioning,
|
||||
@ -806,6 +805,13 @@ class VllmConfig:
|
||||
# Use custom rms norm to unblock. In the future,
|
||||
# the pass will operate on higher-level IR to avoid the issue.
|
||||
# TODO: https://github.com/vllm-project/vllm/issues/27894
|
||||
if self.compilation_config.mode != CompilationMode.VLLM_COMPILE:
|
||||
logger.warning(
|
||||
"Sequence parallelism is enabled, but running in wrong "
|
||||
"vllm compile mode: %s.",
|
||||
self.compilation_config.mode,
|
||||
)
|
||||
|
||||
is_fullgraph = (
|
||||
self.compilation_config.use_inductor_graph_partition
|
||||
or len(self.compilation_config.splitting_ops) == 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user