mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 06:35:02 +08:00
[DCP] check return_lse for all layers in dcp (#27929)
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
parent
0606bea2b6
commit
5d16d0fa62
@ -4680,10 +4680,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
kv_transfer_group.set_host_xfer_buffer_ops(copy_kv_blocks)
|
kv_transfer_group.set_host_xfer_buffer_ops(copy_kv_blocks)
|
||||||
|
|
||||||
if self.dcp_world_size > 1:
|
if self.dcp_world_size > 1:
|
||||||
layer_names = self.attn_groups[0][0].layer_names
|
layers = get_layers_from_vllm_config(self.vllm_config, AttentionLayerBase)
|
||||||
layers = get_layers_from_vllm_config(
|
|
||||||
self.vllm_config, AttentionLayerBase, layer_names
|
|
||||||
)
|
|
||||||
for layer in layers.values():
|
for layer in layers.values():
|
||||||
assert layer.impl.need_to_return_lse_for_decode, (
|
assert layer.impl.need_to_return_lse_for_decode, (
|
||||||
"DCP requires attention impls to return"
|
"DCP requires attention impls to return"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user