[ROCm] Effort to reduce the number of environment variables in command line (#17229)

Signed-off-by: Hongxia Yang <hongxia.yang@amd.com>
This commit is contained in:
Hongxia Yang 2025-05-01 02:27:06 -04:00 committed by GitHub
parent 7a0a146c54
commit 90d0a54c4d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -114,6 +114,15 @@ COPY --from=export_vllm /examples ${COMMON_WORKDIR}/vllm/examples
ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1
ENV TOKENIZERS_PARALLELISM=false
# ENV that can improve safe tensor loading, and end-to-end time
ENV SAFETENSORS_FAST_GPU=1
# User-friendly environment setting for multi-processing to avoid below RuntimeError.
# RuntimeError: Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing,
# you must use the 'spawn' start method
# See https://pytorch.org/docs/stable/notes/multiprocessing.html#cuda-in-multiprocessing
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
# Performance environment variable.
ENV HIP_FORCE_DEV_KERNARG=1