mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-07 19:27:06 +08:00
misc cleanups to prepare for rebase
Signed-off-by: Sage Moore <sage@neuralmagic.com>
This commit is contained in:
parent
0323e29153
commit
8f592524cb
@ -811,10 +811,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
||||
# all2all backend for vllm's expert parallel communication
|
||||
"VLLM_ALL2ALL_BACKEND":
|
||||
lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"),
|
||||
|
||||
# check that the cudagraphs input addresses are correct before replaying
|
||||
"VLLM_CUDAGRAPH_SANITIZER":
|
||||
lambda: os.getenv("VLLM_CUDAGRAPH_SANITIZER", "0") == "1",
|
||||
}
|
||||
|
||||
# end-env-vars-definition
|
||||
|
||||
@ -29,7 +29,6 @@ from vllm.model_executor.utils import set_weight_attrs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.platforms.interface import CpuArchEnum
|
||||
from vllm.utils import direct_register_custom_op
|
||||
from vllm.v1.worker.ubatching import get_current_ubatch_context
|
||||
|
||||
has_pplx = importlib.util.find_spec("pplx_kernels") is not None
|
||||
|
||||
@ -306,6 +305,7 @@ class AllToAllCache:
|
||||
self._cache[key] = instance
|
||||
return instance
|
||||
|
||||
|
||||
# Global singleton
|
||||
_all_to_all_cache = AllToAllCache()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user