mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-30 13:07:08 +08:00
cleanup
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
06cc133a63
commit
908e9f8f54
@ -4578,7 +4578,7 @@ class VllmConfig:
|
|||||||
# More specifically piecewise cuda-graphs
|
# More specifically piecewise cuda-graphs
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
"Piecewise compilation is not supported with "
|
"Piecewise compilation is not supported with "
|
||||||
"microbatching. Disabling piecewiseching compilation.")
|
"microbatching. Disabling piecewise compilation.")
|
||||||
self.compilation_config.level = CompilationLevel.NO_COMPILATION
|
self.compilation_config.level = CompilationLevel.NO_COMPILATION
|
||||||
disable_chunked_prefill_reasons: list[str] = []
|
disable_chunked_prefill_reasons: list[str] = []
|
||||||
|
|
||||||
|
|||||||
@ -44,9 +44,7 @@ class DPMetadata:
|
|||||||
device="cpu",
|
device="cpu",
|
||||||
dtype=torch.int32)
|
dtype=torch.int32)
|
||||||
from vllm.distributed.parallel_state import get_dp_group
|
from vllm.distributed.parallel_state import get_dp_group
|
||||||
# logger.info("STARTING AR num_tokens_across_dp")
|
|
||||||
dist.all_reduce(num_tokens_tensor, group=get_dp_group().cpu_group)
|
dist.all_reduce(num_tokens_tensor, group=get_dp_group().cpu_group)
|
||||||
# logger.info("finishing num_tokens_across_dp")
|
|
||||||
return num_tokens_tensor
|
return num_tokens_tensor
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -57,16 +55,13 @@ class DPMetadata:
|
|||||||
device="cpu",
|
device="cpu",
|
||||||
dtype=torch.int32)
|
dtype=torch.int32)
|
||||||
from vllm.distributed.parallel_state import get_dp_group
|
from vllm.distributed.parallel_state import get_dp_group
|
||||||
# logger.info(f"should_ubatch_tensor before ar {should_ubatch_tensor}")
|
|
||||||
dist.all_reduce(should_ubatch_tensor, group=get_dp_group().cpu_group)
|
dist.all_reduce(should_ubatch_tensor, group=get_dp_group().cpu_group)
|
||||||
# logger.info(f"should_ubatch_tensor after ar {should_ubatch_tensor}")
|
|
||||||
|
|
||||||
# If there's an incorrect ordering of ARs across DP ranks, this tensor
|
# If there's an incorrect ordering of ARs across DP ranks, this tensor
|
||||||
# can end up containing the number of padded tokens for a DP rank
|
# can end up containing the number of padded tokens for a DP rank
|
||||||
assert torch.all(should_ubatch_tensor <= 1)
|
assert torch.all(should_ubatch_tensor <= 1)
|
||||||
|
|
||||||
result: bool = bool(torch.all(should_ubatch_tensor == 1).item())
|
result: bool = bool(torch.all(should_ubatch_tensor == 1).item())
|
||||||
# print(f"FINISHING AR should_ubatch_across_dp {result} {should_ubatch_tensor}")
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|||||||
@ -823,8 +823,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
))
|
))
|
||||||
for layer_name in kv_cache_group_spec.layer_names:
|
for layer_name in kv_cache_group_spec.layer_names:
|
||||||
assert type(attn_metadata) is list
|
assert type(attn_metadata) is list
|
||||||
# assert attn_metadata_i is not None
|
|
||||||
# What if it's None? Do we still add it to the list?
|
|
||||||
attn_metadata[ubid][layer_name] = attn_metadata_i
|
attn_metadata[ubid][layer_name] = attn_metadata_i
|
||||||
else:
|
else:
|
||||||
attn_metadata_i = (
|
attn_metadata_i = (
|
||||||
@ -1581,7 +1579,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def _ubatch_thread(results, model, ubatch_metadata):
|
def _ubatch_thread(results, model, ubatch_metadata):
|
||||||
# print(f"Starting Request on ubatch: {ubatch_ctx.id}", flush=True)
|
|
||||||
with ubatch_metadata.context:
|
with ubatch_metadata.context:
|
||||||
model_output = model(
|
model_output = model(
|
||||||
input_ids=ubatch_metadata.input_ids,
|
input_ids=ubatch_metadata.input_ids,
|
||||||
@ -1590,7 +1587,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
inputs_embeds=ubatch_metadata.inputs_embeds,
|
inputs_embeds=ubatch_metadata.inputs_embeds,
|
||||||
)
|
)
|
||||||
results.append((ubatch_metadata.context.id, model_output))
|
results.append((ubatch_metadata.context.id, model_output))
|
||||||
# print(f"Finishing Request on ubatch: {ubatch_ctx.id}", flush=True)
|
|
||||||
|
|
||||||
def _run_ubatches(ubatch_metadata, model) -> torch.Tensor:
|
def _run_ubatches(ubatch_metadata, model) -> torch.Tensor:
|
||||||
results: list[tuple[int, torch.Tensor]] = []
|
results: list[tuple[int, torch.Tensor]] = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user