mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-28 02:45:52 +08:00
add log
Signed-off-by: jiangkuaixue123 <jiangxiaozhou111@163.com>
This commit is contained in:
parent
00570c9fac
commit
36f9c3d6b5
@ -557,6 +557,7 @@ class ParallelConfig:
|
||||
if self.distributed_executor_backend is None and self.world_size > 1:
|
||||
# We use multiprocessing by default if world_size fits on the
|
||||
# current node and we aren't in a ray placement group.
|
||||
|
||||
from vllm.v1.executor import ray_utils
|
||||
|
||||
backend: DistributedExecutorBackend = "mp"
|
||||
|
||||
@ -317,4 +317,5 @@ class P2PAFDConnector(AFDConnectorBase):
|
||||
)
|
||||
self._current_afd_connector_metadata.recv_handle_list = work_list
|
||||
self._current_afd_connector_metadata.layer_idx = layer_idx
|
||||
self._current_afd_connector_metadata.stage_idx = stage_idx
|
||||
return hidden_states, self._current_afd_connector_metadata
|
||||
|
||||
@ -563,7 +563,6 @@ class ColumnParallelLinear(LinearBase):
|
||||
# Matrix multiply.
|
||||
assert self.quant_method is not None
|
||||
output_parallel = self.quant_method.apply(self, input_, bias)
|
||||
|
||||
if self.gather_output and self.tp_size > 1:
|
||||
# All-gather across the partitions.
|
||||
output = tensor_model_parallel_all_gather(output_parallel)
|
||||
|
||||
@ -387,7 +387,6 @@ class DeepseekV2MoE(nn.Module):
|
||||
final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
|
||||
final_hidden_states
|
||||
)
|
||||
|
||||
return final_hidden_states.view(num_tokens, hidden_dim)
|
||||
|
||||
|
||||
@ -1415,9 +1414,6 @@ class DeepseekV2Model(nn.Module):
|
||||
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
for stage_i in range(forward_conext.afd_metadata.num_of_stages):
|
||||
logger.info(
|
||||
f"jcz deepseekv2 forward_with_afd_v2 layer_idx: {layer.layer_idx}, stage_i: {stage_i}"
|
||||
)
|
||||
afd_connector = afd_metadata.afd_connector
|
||||
forward_conext.attn_metadata = afd_metadata.attn_metadata_list[stage_i]
|
||||
forward_conext.dp_metadata = afd_metadata.dp_metadata_list[stage_i]
|
||||
@ -1436,10 +1432,6 @@ class DeepseekV2Model(nn.Module):
|
||||
work.wait()
|
||||
|
||||
current_positions = afd_metadata.positions_list[stage_i]
|
||||
logger.info(
|
||||
f"jcz deepseekv2 forward_with_afd_v2 hidden_states: {hidden_states.shape}"
|
||||
f" positions:{positions.shape}"
|
||||
)
|
||||
hidden_states, residual = layer(
|
||||
current_positions, hidden_states, residual, llama_4_scaling
|
||||
)
|
||||
@ -1458,15 +1450,11 @@ class DeepseekV2Model(nn.Module):
|
||||
)
|
||||
afd_connector.send_attn_output(hidden_states, metadata)
|
||||
|
||||
# Recv last layer and last stage FFN output.
|
||||
ubatch_hidden_states[afd_metadata.num_of_stages - 1], recv_metadata = (
|
||||
afd_connector.recv_ffn_output()
|
||||
)
|
||||
if recv_metadata.recv_handle_list is not None:
|
||||
recv_handle = recv_metadata.recv_handle_list
|
||||
if recv_handle is not None:
|
||||
for work in recv_handle:
|
||||
work.wait()
|
||||
# Recv last layer FFN output.
|
||||
for stage_i in range(afd_metadata.num_of_stages):
|
||||
ubatch_hidden_states[stage_i], recv_metadata = (
|
||||
afd_connector.recv_ffn_output()
|
||||
)
|
||||
|
||||
# Re-assemble the batch
|
||||
hidden_states = torch.cat(ubatch_hidden_states, dim=0)
|
||||
|
||||
@ -105,7 +105,6 @@ class EngineCore:
|
||||
|
||||
self.afd_config = vllm_config.afd_config
|
||||
if self.afd_config and self.afd_config.afd_role == "ffn":
|
||||
logger.info("jcz EngineCore ffn role")
|
||||
return
|
||||
|
||||
self.available_gpu_memory_for_kv_cache = -1
|
||||
|
||||
@ -137,6 +137,7 @@ class GPUFFNModelRunner(LoRAModelRunnerMixin):
|
||||
current_layer_idx = recv_metadata.layer_idx
|
||||
logger.info(
|
||||
f"layer {current_layer_idx} moe recv hidden states type:{type(hidden_states)}, shape:{hidden_states.shape}"
|
||||
f" dp_metadata: {dp_metadata}"
|
||||
)
|
||||
num_tokens = hidden_states.shape[0]
|
||||
if recv_metadata is not None and recv_metadata.recv_handle_list is not None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user