mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 05:47:04 +08:00
remove debug logging
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
5bbfd95bdb
commit
2cf200c5b8
@ -1903,27 +1903,27 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
is_dummy_run=is_dummy_run)
|
is_dummy_run=is_dummy_run)
|
||||||
if num_scheduled_tokens not in self.cudagraphs \
|
if num_scheduled_tokens not in self.cudagraphs \
|
||||||
and not skip_cuda_graphs and build_cuda_graph:
|
and not skip_cuda_graphs and build_cuda_graph:
|
||||||
if is_global_first_rank():
|
# if is_global_first_rank():
|
||||||
logger.info(f"CAPTURING {num_scheduled_tokens}")
|
# logger.info(f"CAPTURING {num_scheduled_tokens}")
|
||||||
return self._capture_ubatches(ubatch_metadata, self.model)
|
return self._capture_ubatches(ubatch_metadata, self.model)
|
||||||
elif num_scheduled_tokens in self.cudagraphs and not skip_cuda_graphs:
|
elif num_scheduled_tokens in self.cudagraphs and not skip_cuda_graphs:
|
||||||
# assert False
|
# assert False
|
||||||
cudagraph_metadata = self.cudagraphs[num_scheduled_tokens]
|
cudagraph_metadata = self.cudagraphs[num_scheduled_tokens]
|
||||||
if is_global_first_rank():
|
# if is_global_first_rank():
|
||||||
logger.info(f"UBATCH REPLAY {num_scheduled_tokens}")
|
# logger.info(f"UBATCH REPLAY {num_scheduled_tokens}")
|
||||||
cudagraph_metadata.cudagraph.replay()
|
cudagraph_metadata.cudagraph.replay()
|
||||||
return cudagraph_metadata.outputs
|
return cudagraph_metadata.outputs
|
||||||
else:
|
else:
|
||||||
if is_global_first_rank():
|
# if is_global_first_rank():
|
||||||
logger.info(f"RUNNING NORMALLY {num_scheduled_tokens}")
|
# logger.info(f"RUNNING NORMALLY {num_scheduled_tokens}")
|
||||||
return self._run_ubatches(ubatch_metadata, self.model)
|
return self._run_ubatches(ubatch_metadata, self.model)
|
||||||
# run normal batch
|
# run normal batch
|
||||||
else:
|
else:
|
||||||
input_ids, positions, inputs_embeds, intermediate_tensors = \
|
input_ids, positions, inputs_embeds, intermediate_tensors = \
|
||||||
self.model_inputs(slice(0, num_scheduled_tokens),
|
self.model_inputs(slice(0, num_scheduled_tokens),
|
||||||
scheduler_output, is_dummy_run)
|
scheduler_output, is_dummy_run)
|
||||||
if is_global_first_rank():
|
# if is_global_first_rank():
|
||||||
logger.info(f"RUNNING FULL BATCH {num_scheduled_tokens}")
|
# logger.info(f"RUNNING FULL BATCH {num_scheduled_tokens}")
|
||||||
skip_cuda_graphs = self.parallel_config.enable_microbatching
|
skip_cuda_graphs = self.parallel_config.enable_microbatching
|
||||||
with set_forward_context(attn_metadata,
|
with set_forward_context(attn_metadata,
|
||||||
vllm_config=self.vllm_config,
|
vllm_config=self.vllm_config,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user