mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:44:57 +08:00
[Misc] Minor patch for draft model runner (#6523)
This commit is contained in:
parent
61e592747c
commit
8a74c68bd1
@ -15,8 +15,12 @@ from vllm.worker.model_runner import (ModelInputForGPUWithSamplingMetadata,
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# A flag to enable debug prints for the updated input tensors
|
||||
# before each step.
|
||||
debug_advance_input = False
|
||||
enable_gpu_advance_step = True
|
||||
# A flag to allow GPU advance step for draft model runner.
|
||||
# Set to False for debugging.
|
||||
allow_gpu_advance_step = True
|
||||
|
||||
|
||||
class TP1DraftModelRunner(ModelRunner):
|
||||
@ -196,7 +200,7 @@ class TP1DraftModelRunner(ModelRunner):
|
||||
3. No LORA
|
||||
4. No prompt_adapter_config
|
||||
"""
|
||||
if not enable_gpu_advance_step:
|
||||
if not allow_gpu_advance_step:
|
||||
return False
|
||||
|
||||
# We allow multi-step GPU only in decode mode
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user