mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-08 07:37:02 +08:00
[Hardware] [HPU]add mark_step for hpu (#10239)
Signed-off-by: Kunshang Ji <kunshang.ji@intel.com>
This commit is contained in:
parent
8d74b5aee9
commit
76aab90ab6
@ -272,6 +272,19 @@ def precompute_indices_and_offsets(block_size, slot_mapping, is_prompt):
|
||||
return indices, offsets
|
||||
|
||||
|
||||
def modify_decoder_layer(module: torch.nn.Module, suffix="DecoderLayer"):
|
||||
if module.__class__.__name__.endswith(suffix):
|
||||
|
||||
def forward_hook(module, args, output):
|
||||
htorch.core.mark_step()
|
||||
return output
|
||||
|
||||
module.register_forward_hook(forward_hook)
|
||||
|
||||
for child_name, child_module in module.named_children():
|
||||
modify_decoder_layer(child_module)
|
||||
|
||||
|
||||
class HpuModelAdapter:
|
||||
|
||||
def __init__(self, model, block_size, dtype, enforce_eager):
|
||||
@ -636,6 +649,7 @@ class HPUModelRunnerBase(ModelRunnerBase[TModelInputForHPU]):
|
||||
else:
|
||||
self.model = self.model.to("hpu")
|
||||
htcore.mark_step()
|
||||
modify_decoder_layer(self.model)
|
||||
torch.hpu.synchronize()
|
||||
|
||||
with HabanaMemoryProfiler() as m_wrap:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user