mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-03 15:50:05 +08:00
[Refactor] Remove Unused Env VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON (#20334)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
7058d7dd5d
commit
9dae7d46bf
@ -104,7 +104,6 @@ if TYPE_CHECKING:
|
|||||||
VLLM_SERVER_DEV_MODE: bool = False
|
VLLM_SERVER_DEV_MODE: bool = False
|
||||||
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
|
VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128
|
||||||
VLLM_MLA_DISABLE: bool = False
|
VLLM_MLA_DISABLE: bool = False
|
||||||
VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON: bool = False
|
|
||||||
VLLM_RAY_PER_WORKER_GPUS: float = 1.0
|
VLLM_RAY_PER_WORKER_GPUS: float = 1.0
|
||||||
VLLM_RAY_BUNDLE_INDICES: str = ""
|
VLLM_RAY_BUNDLE_INDICES: str = ""
|
||||||
VLLM_CUDART_SO_PATH: Optional[str] = None
|
VLLM_CUDART_SO_PATH: Optional[str] = None
|
||||||
@ -769,12 +768,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_MLA_DISABLE":
|
"VLLM_MLA_DISABLE":
|
||||||
lambda: bool(int(os.getenv("VLLM_MLA_DISABLE", "0"))),
|
lambda: bool(int(os.getenv("VLLM_MLA_DISABLE", "0"))),
|
||||||
|
|
||||||
# If set, vLLM will use the Triton implementation of moe_align_block_size,
|
|
||||||
# i.e. moe_align_block_size_triton in fused_moe.py.
|
|
||||||
"VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON":
|
|
||||||
lambda: bool(int(os.getenv("VLLM_ENABLE_MOE_ALIGN_BLOCK_SIZE_TRITON", "0"))
|
|
||||||
),
|
|
||||||
|
|
||||||
# Number of GPUs per worker in Ray, if it is set to be a fraction,
|
# Number of GPUs per worker in Ray, if it is set to be a fraction,
|
||||||
# it allows ray to schedule multiple actors on a single GPU,
|
# it allows ray to schedule multiple actors on a single GPU,
|
||||||
# so that users can colocate other actors on the same GPUs as vLLM.
|
# so that users can colocate other actors on the same GPUs as vLLM.
|
||||||
|
|||||||
@ -94,7 +94,6 @@ def moe_align_block_size_stage4(
|
|||||||
|
|
||||||
# Triton implementation based on:
|
# Triton implementation based on:
|
||||||
# https://github.com/sgl-project/sglang/commit/ba5112ff691d791a9e38c6c71f59324a5fcb49d0
|
# https://github.com/sgl-project/sglang/commit/ba5112ff691d791a9e38c6c71f59324a5fcb49d0
|
||||||
# TODO(wentao): Deprecated this function in the future.
|
|
||||||
def moe_align_block_size_triton(
|
def moe_align_block_size_triton(
|
||||||
topk_ids: torch.Tensor,
|
topk_ids: torch.Tensor,
|
||||||
num_experts: int,
|
num_experts: int,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user