mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-20 01:37:33 +08:00
[Ascend]: Fixed the issue where OOT Platform vllm-ascend could not enable SP in Eager mode (#28935)
Signed-off-by: leo-pony <nengjunma@outlook.com>
This commit is contained in:
parent
38caf7fa1a
commit
eaf81485ed
@ -855,6 +855,13 @@ class CompilationConfig:
|
|||||||
self.compute_bs_to_padded_graph_size()
|
self.compute_bs_to_padded_graph_size()
|
||||||
|
|
||||||
def set_splitting_ops_for_v1(self):
|
def set_splitting_ops_for_v1(self):
|
||||||
|
# To compatible with OOT hardware plugin platform (for example vllm-ascend)
|
||||||
|
# which currently only supports sequence parallelism in eager mode.
|
||||||
|
if self.mode != CompilationMode.VLLM_COMPILE:
|
||||||
|
if self.splitting_ops is None:
|
||||||
|
self.splitting_ops = []
|
||||||
|
return
|
||||||
|
|
||||||
# NOTE: this function needs to be called only when mode is
|
# NOTE: this function needs to be called only when mode is
|
||||||
# CompilationMode.VLLM_COMPILE
|
# CompilationMode.VLLM_COMPILE
|
||||||
assert self.mode == CompilationMode.VLLM_COMPILE, (
|
assert self.mode == CompilationMode.VLLM_COMPILE, (
|
||||||
|
|||||||
@ -797,8 +797,7 @@ class VllmConfig:
|
|||||||
), "MTP with cp_kv_cache_interleave_size > 1 is not supported now."
|
), "MTP with cp_kv_cache_interleave_size > 1 is not supported now."
|
||||||
|
|
||||||
# Do this after all the updates to compilation_config.mode
|
# Do this after all the updates to compilation_config.mode
|
||||||
if self.compilation_config.mode == CompilationMode.VLLM_COMPILE:
|
self.compilation_config.set_splitting_ops_for_v1()
|
||||||
self.compilation_config.set_splitting_ops_for_v1()
|
|
||||||
|
|
||||||
if self.compilation_config.pass_config.enable_sequence_parallelism:
|
if self.compilation_config.pass_config.enable_sequence_parallelism:
|
||||||
# With pipeline parallelism or dynamo partitioning,
|
# With pipeline parallelism or dynamo partitioning,
|
||||||
@ -806,6 +805,13 @@ class VllmConfig:
|
|||||||
# Use custom rms norm to unblock. In the future,
|
# Use custom rms norm to unblock. In the future,
|
||||||
# the pass will operate on higher-level IR to avoid the issue.
|
# the pass will operate on higher-level IR to avoid the issue.
|
||||||
# TODO: https://github.com/vllm-project/vllm/issues/27894
|
# TODO: https://github.com/vllm-project/vllm/issues/27894
|
||||||
|
if self.compilation_config.mode != CompilationMode.VLLM_COMPILE:
|
||||||
|
logger.warning(
|
||||||
|
"Sequence parallelism is enabled, but running in wrong "
|
||||||
|
"vllm compile mode: %s.",
|
||||||
|
self.compilation_config.mode,
|
||||||
|
)
|
||||||
|
|
||||||
is_fullgraph = (
|
is_fullgraph = (
|
||||||
self.compilation_config.use_inductor_graph_partition
|
self.compilation_config.use_inductor_graph_partition
|
||||||
or len(self.compilation_config.splitting_ops) == 0
|
or len(self.compilation_config.splitting_ops) == 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user