From 8f592524cb44b55b7b7b759fdc56418d41f07442 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Mon, 2 Jun 2025 14:15:52 +0000 Subject: [PATCH] misc cleanups to prepare for rebase Signed-off-by: Sage Moore --- vllm/envs.py | 4 ---- vllm/model_executor/layers/fused_moe/layer.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index cd545f32c4301..dc23c8ea5314d 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -811,10 +811,6 @@ environment_variables: dict[str, Callable[[], Any]] = { # all2all backend for vllm's expert parallel communication "VLLM_ALL2ALL_BACKEND": lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"), - - # check that the cudagraphs input addresses are correct before replaying - "VLLM_CUDAGRAPH_SANITIZER": - lambda: os.getenv("VLLM_CUDAGRAPH_SANITIZER", "0") == "1", } # end-env-vars-definition diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 822bde906dc97..f1cb77f64eae7 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -29,7 +29,6 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.platforms.interface import CpuArchEnum from vllm.utils import direct_register_custom_op -from vllm.v1.worker.ubatching import get_current_ubatch_context has_pplx = importlib.util.find_spec("pplx_kernels") is not None @@ -306,6 +305,7 @@ class AllToAllCache: self._cache[key] = instance return instance + # Global singleton _all_to_all_cache = AllToAllCache()