misc cleanups to prepare for rebase

Signed-off-by: Sage Moore <sage@neuralmagic.com>
2026-07-07 12:57:24 +08:00 · 2025-06-02 14:15:52 +00:00 · 2025-06-02 14:15:52 +00:00 · 8f592524cb
commit 8f592524cb
parent 0323e29153
2 changed files with 1 additions and 5 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -811,10 +811,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # all2all backend for vllm's expert parallel communication
    "VLLM_ALL2ALL_BACKEND":
    lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"),
-
-    # check that the cudagraphs input addresses are correct before replaying
-    "VLLM_CUDAGRAPH_SANITIZER":
-    lambda: os.getenv("VLLM_CUDAGRAPH_SANITIZER", "0") == "1",
 }

 # end-env-vars-definition
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -29,7 +29,6 @@ from vllm.model_executor.utils import set_weight_attrs
 from vllm.platforms import current_platform
 from vllm.platforms.interface import CpuArchEnum
 from vllm.utils import direct_register_custom_op
-from vllm.v1.worker.ubatching import get_current_ubatch_context

 has_pplx = importlib.util.find_spec("pplx_kernels") is not None

@ -306,6 +305,7 @@ class AllToAllCache:
                self._cache[key] = instance
            return instance

+
 # Global singleton
 _all_to_all_cache = AllToAllCache()