mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-08 18:27:05 +08:00
misc fixes
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
4819bb8715
commit
6b0c303ab4
@ -189,7 +189,6 @@ def _support_torch_compile(
|
||||
CompilationLevel.NO_COMPILATION, CompilationLevel.DYNAMO_AS_IS
|
||||
] or not supports_dynamo() or _should_ignore_torch_compile(
|
||||
self.__class__)
|
||||
self.do_not_compile = True
|
||||
if self.do_not_compile:
|
||||
return
|
||||
|
||||
|
||||
@ -1918,11 +1918,12 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
scheduler_output, is_dummy_run)
|
||||
# if is_global_first_rank():
|
||||
# logger.info(f"RUNNING FULL BATCH {num_scheduled_tokens}")
|
||||
skip_cuda_graphs = self.parallel_config.enable_microbatching
|
||||
with set_forward_context(attn_metadata,
|
||||
vllm_config=self.vllm_config,
|
||||
num_tokens=num_scheduled_tokens or 1,
|
||||
num_tokens_across_dp=num_tokens_across_dp,
|
||||
skip_cuda_graphs=True):
|
||||
skip_cuda_graphs=skip_cuda_graphs):
|
||||
return self.model(
|
||||
input_ids=input_ids,
|
||||
positions=positions,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user