mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 06:55:01 +08:00
[misc] instruct pytorch to use nvml-based cuda check (#15951)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
parent
8bd651b318
commit
1cab43c2d2
@ -4,9 +4,10 @@
|
|||||||
# version library first. Such assumption is critical for some customization.
|
# version library first. Such assumption is critical for some customization.
|
||||||
from .version import __version__, __version_tuple__ # isort:skip
|
from .version import __version__, __version_tuple__ # isort:skip
|
||||||
|
|
||||||
import os
|
# The environment variables override should be imported before any other
|
||||||
|
# modules to ensure that the environment variables are set before any
|
||||||
import torch
|
# other modules are imported.
|
||||||
|
import vllm.env_override # isort:skip # noqa: F401
|
||||||
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||||
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
@ -23,19 +24,6 @@ from vllm.outputs import (ClassificationOutput, ClassificationRequestOutput,
|
|||||||
from vllm.pooling_params import PoolingParams
|
from vllm.pooling_params import PoolingParams
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
|
||||||
# set some common config/environment variables that should be set
|
|
||||||
# for all processes created by vllm and all processes
|
|
||||||
# that interact with vllm workers.
|
|
||||||
# they are executed whenever `import vllm` is called.
|
|
||||||
|
|
||||||
# see https://github.com/NVIDIA/nccl/issues/1234
|
|
||||||
os.environ['NCCL_CUMEM_ENABLE'] = '0'
|
|
||||||
|
|
||||||
# see https://github.com/vllm-project/vllm/issues/10480
|
|
||||||
os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
|
|
||||||
# see https://github.com/vllm-project/vllm/issues/10619
|
|
||||||
torch._inductor.config.compile_threads = 1
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"__version__",
|
"__version__",
|
||||||
"__version_tuple__",
|
"__version_tuple__",
|
||||||
|
|||||||
21
vllm/env_override.py
Normal file
21
vllm/env_override.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
import os
|
||||||
|
|
||||||
|
import torch
|
||||||
|
|
||||||
|
# set some common config/environment variables that should be set
|
||||||
|
# for all processes created by vllm and all processes
|
||||||
|
# that interact with vllm workers.
|
||||||
|
# they are executed whenever `import vllm` is called.
|
||||||
|
|
||||||
|
# see https://github.com/NVIDIA/nccl/issues/1234
|
||||||
|
os.environ['NCCL_CUMEM_ENABLE'] = '0'
|
||||||
|
|
||||||
|
# see https://github.com/vllm-project/vllm/pull/15951
|
||||||
|
# it avoids unintentional cuda initialization from torch.cuda.is_available()
|
||||||
|
os.environ['PYTORCH_NVML_BASED_CUDA_CHECK'] = '1'
|
||||||
|
|
||||||
|
# see https://github.com/vllm-project/vllm/issues/10480
|
||||||
|
os.environ['TORCHINDUCTOR_COMPILE_THREADS'] = '1'
|
||||||
|
# see https://github.com/vllm-project/vllm/issues/10619
|
||||||
|
torch._inductor.config.compile_threads = 1
|
||||||
Loading…
x
Reference in New Issue
Block a user