add log

Signed-off-by: jiangkuaixue123 <jiangxiaozhou111@163.com>
2026-06-29 04:27:11 +08:00 · 2025-12-15 16:25:45 +08:00 · 2025-12-15 16:25:45 +08:00 · 36f9c3d6b5
commit 36f9c3d6b5
parent 00570c9fac
6 changed files with 8 additions and 19 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@ -557,6 +557,7 @@ class ParallelConfig:
        if self.distributed_executor_backend is None and self.world_size > 1:
            # We use multiprocessing by default if world_size fits on the
            # current node and we aren't in a ray placement group.
+            
            from vllm.v1.executor import ray_utils

            backend: DistributedExecutorBackend = "mp"
--- a/vllm/distributed/afd_transfer/afd_connector/p2p_connector.py
+++ b/vllm/distributed/afd_transfer/afd_connector/p2p_connector.py
@ -317,4 +317,5 @@ class P2PAFDConnector(AFDConnectorBase):
        )
        self._current_afd_connector_metadata.recv_handle_list = work_list
        self._current_afd_connector_metadata.layer_idx = layer_idx
+        self._current_afd_connector_metadata.stage_idx = stage_idx
        return hidden_states, self._current_afd_connector_metadata
--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@ -563,7 +563,6 @@ class ColumnParallelLinear(LinearBase):
        # Matrix multiply.
        assert self.quant_method is not None
        output_parallel = self.quant_method.apply(self, input_, bias)
-
        if self.gather_output and self.tp_size > 1:
            # All-gather across the partitions.
            output = tensor_model_parallel_all_gather(output_parallel)
--- a/vllm/model_executor/models/deepseek_v2.py
+++ b/vllm/model_executor/models/deepseek_v2.py
@ -387,7 +387,6 @@ class DeepseekV2MoE(nn.Module):
            final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel(
                final_hidden_states
            )
-
        return final_hidden_states.view(num_tokens, hidden_dim)


@ -1415,9 +1414,6 @@ class DeepseekV2Model(nn.Module):

        for layer in islice(self.layers, self.start_layer, self.end_layer):
            for stage_i in range(forward_conext.afd_metadata.num_of_stages):
-                logger.info(
-                    f"jcz deepseekv2 forward_with_afd_v2 layer_idx: {layer.layer_idx}, stage_i: {stage_i}"
-                )
                afd_connector = afd_metadata.afd_connector
                forward_conext.attn_metadata = afd_metadata.attn_metadata_list[stage_i]
                forward_conext.dp_metadata = afd_metadata.dp_metadata_list[stage_i]
@ -1436,10 +1432,6 @@ class DeepseekV2Model(nn.Module):
                        work.wait()

                current_positions = afd_metadata.positions_list[stage_i]
-                logger.info(
-                    f"jcz deepseekv2 forward_with_afd_v2 hidden_states: {hidden_states.shape}"
-                    f" positions:{positions.shape}"
-                )
                hidden_states, residual = layer(
                    current_positions, hidden_states, residual, llama_4_scaling
                )
@ -1458,15 +1450,11 @@ class DeepseekV2Model(nn.Module):
                )
                afd_connector.send_attn_output(hidden_states, metadata)

-        # Recv last layer and last stage FFN output.
-        ubatch_hidden_states[afd_metadata.num_of_stages - 1], recv_metadata = (
-            afd_connector.recv_ffn_output()
-        )
-        if recv_metadata.recv_handle_list is not None:
-            recv_handle = recv_metadata.recv_handle_list
-        if recv_handle is not None:
-            for work in recv_handle:
-                work.wait()
+        # Recv last layer FFN output.
+        for stage_i in range(afd_metadata.num_of_stages):
+            ubatch_hidden_states[stage_i], recv_metadata = (
+                afd_connector.recv_ffn_output()
+            )

        # Re-assemble the batch
        hidden_states = torch.cat(ubatch_hidden_states, dim=0)
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -105,7 +105,6 @@ class EngineCore:

        self.afd_config = vllm_config.afd_config
        if self.afd_config and self.afd_config.afd_role == "ffn":
-            logger.info("jcz EngineCore ffn role")
            return

        self.available_gpu_memory_for_kv_cache = -1
--- a/vllm/v1/worker/gpu_ffn_model_runner.py
+++ b/vllm/v1/worker/gpu_ffn_model_runner.py
@ -137,6 +137,7 @@ class GPUFFNModelRunner(LoRAModelRunnerMixin):
            current_layer_idx = recv_metadata.layer_idx
            logger.info(
                f"layer {current_layer_idx} moe recv hidden states type:{type(hidden_states)}, shape:{hidden_states.shape}"
+                f" dp_metadata: {dp_metadata}"
            )
            num_tokens = hidden_states.shape[0]
            if recv_metadata is not None and recv_metadata.recv_handle_list is not None: