mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-20 04:05:01 +08:00
[Refactor] Remove VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK (#27750)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
4917002523
commit
a8141fa649
@ -363,7 +363,7 @@ class DeepEPLLAll2AllManager(DeepEPAll2AllManagerBase):
|
|||||||
num_rdma_bytes=num_rdma_bytes,
|
num_rdma_bytes=num_rdma_bytes,
|
||||||
low_latency_mode=True,
|
low_latency_mode=True,
|
||||||
num_qps_per_rank=num_qps_per_rank,
|
num_qps_per_rank=num_qps_per_rank,
|
||||||
allow_nvlink_for_low_latency_mode=envs.VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK,
|
allow_nvlink_for_low_latency_mode=True,
|
||||||
allow_mnnvl=envs.VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL,
|
allow_mnnvl=envs.VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -207,7 +207,6 @@ if TYPE_CHECKING:
|
|||||||
VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER"
|
VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER"
|
||||||
VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024
|
VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024
|
||||||
VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE: bool = False
|
VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE: bool = False
|
||||||
VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK: bool = True
|
|
||||||
VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL: bool = False
|
VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL: bool = False
|
||||||
VLLM_DBO_COMM_SMS: int = 20
|
VLLM_DBO_COMM_SMS: int = 20
|
||||||
VLLM_PATTERN_MATCH_DEBUG: str | None = None
|
VLLM_PATTERN_MATCH_DEBUG: str | None = None
|
||||||
@ -1400,11 +1399,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE": lambda: bool(
|
"VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE": lambda: bool(
|
||||||
int(os.getenv("VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE", "0"))
|
int(os.getenv("VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE", "0"))
|
||||||
),
|
),
|
||||||
# Allow DeepEP to use nvlink for internode_ll kernel, turn this on for
|
|
||||||
# better latency on GB200 like system
|
|
||||||
"VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK": lambda: bool(
|
|
||||||
int(os.getenv("VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK", "1"))
|
|
||||||
),
|
|
||||||
# Allow DeepEP to use MNNVL (multi-node nvlink) for internode_ll kernel,
|
# Allow DeepEP to use MNNVL (multi-node nvlink) for internode_ll kernel,
|
||||||
# turn this for better latency on GB200 like system
|
# turn this for better latency on GB200 like system
|
||||||
"VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL": lambda: bool(
|
"VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL": lambda: bool(
|
||||||
@ -1566,7 +1560,6 @@ def compute_hash() -> str:
|
|||||||
"VLLM_NVFP4_GEMM_BACKEND",
|
"VLLM_NVFP4_GEMM_BACKEND",
|
||||||
"VLLM_USE_FBGEMM",
|
"VLLM_USE_FBGEMM",
|
||||||
"VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE",
|
"VLLM_DEEPEP_HIGH_THROUGHPUT_FORCE_INTRA_NODE",
|
||||||
"VLLM_DEEPEP_LOW_LATENCY_ALLOW_NVLINK",
|
|
||||||
"VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL",
|
"VLLM_DEEPEP_LOW_LATENCY_USE_MNNVL",
|
||||||
]
|
]
|
||||||
for key in environment_variables_to_hash:
|
for key in environment_variables_to_hash:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user