mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-19 20:37:03 +08:00
misc fixes
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
4819bb8715
commit
6b0c303ab4
@ -189,7 +189,6 @@ def _support_torch_compile(
|
|||||||
CompilationLevel.NO_COMPILATION, CompilationLevel.DYNAMO_AS_IS
|
CompilationLevel.NO_COMPILATION, CompilationLevel.DYNAMO_AS_IS
|
||||||
] or not supports_dynamo() or _should_ignore_torch_compile(
|
] or not supports_dynamo() or _should_ignore_torch_compile(
|
||||||
self.__class__)
|
self.__class__)
|
||||||
self.do_not_compile = True
|
|
||||||
if self.do_not_compile:
|
if self.do_not_compile:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|||||||
@ -1918,11 +1918,12 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
scheduler_output, is_dummy_run)
|
scheduler_output, is_dummy_run)
|
||||||
# if is_global_first_rank():
|
# if is_global_first_rank():
|
||||||
# logger.info(f"RUNNING FULL BATCH {num_scheduled_tokens}")
|
# logger.info(f"RUNNING FULL BATCH {num_scheduled_tokens}")
|
||||||
|
skip_cuda_graphs = self.parallel_config.enable_microbatching
|
||||||
with set_forward_context(attn_metadata,
|
with set_forward_context(attn_metadata,
|
||||||
vllm_config=self.vllm_config,
|
vllm_config=self.vllm_config,
|
||||||
num_tokens=num_scheduled_tokens or 1,
|
num_tokens=num_scheduled_tokens or 1,
|
||||||
num_tokens_across_dp=num_tokens_across_dp,
|
num_tokens_across_dp=num_tokens_across_dp,
|
||||||
skip_cuda_graphs=True):
|
skip_cuda_graphs=skip_cuda_graphs):
|
||||||
return self.model(
|
return self.model(
|
||||||
input_ids=input_ids,
|
input_ids=input_ids,
|
||||||
positions=positions,
|
positions=positions,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user