mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-22 19:11:18 +08:00
better debug utils
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
This commit is contained in:
parent
952f3c5c1e
commit
e4419df256
@ -3,7 +3,7 @@ from abc import ABC, abstractmethod
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from vllm.v1.worker.ubatching import get_current_ubatch_context
|
from vllm.v1.worker.ubatching import get_current_ubatch_context, dump_ubatching_state
|
||||||
|
|
||||||
#
|
#
|
||||||
# This file defines a set of base classes used to make MoE kernels more modular.
|
# This file defines a set of base classes used to make MoE kernels more modular.
|
||||||
@ -347,6 +347,7 @@ class FusedMoEModularKernel(torch.nn.Module):
|
|||||||
# print("in modular moe2, ubatch:", ubatch_ctx.id, self.fused_experts)
|
# print("in modular moe2, ubatch:", ubatch_ctx.id, self.fused_experts)
|
||||||
|
|
||||||
print("pre synchronize")
|
print("pre synchronize")
|
||||||
|
dump_ubatching_state()
|
||||||
torch.cuda.synchronize(a1.device)
|
torch.cuda.synchronize(a1.device)
|
||||||
print("post synchronize")
|
print("post synchronize")
|
||||||
|
|
||||||
|
|||||||
@ -40,9 +40,9 @@ class UBatchContext:
|
|||||||
global _CURRENT_CONTEXT
|
global _CURRENT_CONTEXT
|
||||||
_CURRENT_CONTEXT[threading.get_ident()] = self
|
_CURRENT_CONTEXT[threading.get_ident()] = self
|
||||||
self._cpu_wait()
|
self._cpu_wait()
|
||||||
start_event = torch.cuda.Event()
|
# start_event = torch.cuda.Event()
|
||||||
self.original_stream.record_event(start_event)
|
# self.original_stream.record_event(start_event)
|
||||||
self.stream.wait_event(start_event)
|
# self.stream.wait_event(start_event)
|
||||||
print("Starting ubatch %d" % self.id)
|
print("Starting ubatch %d" % self.id)
|
||||||
# if self.gpu_wait_on_launch:
|
# if self.gpu_wait_on_launch:
|
||||||
# self.gpu_stream_wait()
|
# self.gpu_stream_wait()
|
||||||
@ -131,6 +131,21 @@ def yield_(x: torch.Tensor, schedule: str="default") -> None:
|
|||||||
def yield_(x: torch.Tensor, schedule: str="default") -> None:
|
def yield_(x: torch.Tensor, schedule: str="default") -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def dump_ubatching_state():
|
||||||
|
"""
|
||||||
|
Dump the current UBatchContext state for debugging.
|
||||||
|
"""
|
||||||
|
for ctx in _CURRENT_CONTEXT.values():
|
||||||
|
print(f"UBatchContext: {ctx.id}\n"
|
||||||
|
f" Stream: {ctx.stream}, ({ctx.stream.query()})\n"
|
||||||
|
f" Original Stream: {ctx.original_stream}, ({ctx.original_stream.query()})\n"
|
||||||
|
f" CPU Wait Event: {ctx.cpu_wait_event}\n"
|
||||||
|
f" GPU Wait Event: {ctx.gpu_wait_event} ({ctx.gpu_wait_event.query()})\n"
|
||||||
|
f" CPU Signal Event: {ctx.cpu_signal_event}\n"
|
||||||
|
f" GPU Signal Event: {ctx.gpu_signal_event} ({ctx.gpu_signal_event.query()})\n")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user