mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 00:45:36 +08:00
debug
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
parent
d76541a6c5
commit
52a7d91980
@ -2400,6 +2400,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
|
|
||||||
# Run the model.
|
# Run the model.
|
||||||
# Use persistent buffers for CUDA graphs.
|
# Use persistent buffers for CUDA graphs.
|
||||||
|
logger.info(f"====== EXECUTE {ubatch_slices=}, {num_input_tokens=}, {num_tokens_across_dp=}")
|
||||||
with (set_forward_context(
|
with (set_forward_context(
|
||||||
attn_metadata,
|
attn_metadata,
|
||||||
self.vllm_config,
|
self.vllm_config,
|
||||||
@ -3046,6 +3047,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
(1 token) and prefill (multiple tokens) requests.
|
(1 token) and prefill (multiple tokens) requests.
|
||||||
remove_lora: If False, dummy LoRAs are not destroyed after the run
|
remove_lora: If False, dummy LoRAs are not destroyed after the run
|
||||||
"""
|
"""
|
||||||
|
logger.info("====== DUMMY RUN")
|
||||||
assert cudagraph_runtime_mode is None or \
|
assert cudagraph_runtime_mode is None or \
|
||||||
cudagraph_runtime_mode.valid_runtime_modes()
|
cudagraph_runtime_mode.valid_runtime_modes()
|
||||||
|
|
||||||
|
|||||||
@ -167,6 +167,7 @@ def ubatch_split(
|
|||||||
num_tokens_unpadded,
|
num_tokens_unpadded,
|
||||||
uniform_decode=uniform_decode,
|
uniform_decode=uniform_decode,
|
||||||
)
|
)
|
||||||
|
logger.info(f"==== {should_attempt_ubatching=}, {num_tokens_unpadded=}")
|
||||||
|
|
||||||
# Don't microbatch unless every other DP worker is also microbatching
|
# Don't microbatch unless every other DP worker is also microbatching
|
||||||
should_ubatch, num_tokens_after_padding = get_dp_padding_ubatch(
|
should_ubatch, num_tokens_after_padding = get_dp_padding_ubatch(
|
||||||
@ -175,6 +176,7 @@ def ubatch_split(
|
|||||||
should_attempt_ubatching,
|
should_attempt_ubatching,
|
||||||
vllm_config,
|
vllm_config,
|
||||||
)
|
)
|
||||||
|
logger.info(f"==== {should_ubatch=}, {num_tokens_after_padding=}")
|
||||||
|
|
||||||
if not should_ubatch:
|
if not should_ubatch:
|
||||||
return (None, None)
|
return (None, None)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user