From 02d709a6f1e442ab8f1129170eac338a6b3a4b1f Mon Sep 17 00:00:00 2001 From: Kay Yan Date: Thu, 16 Oct 2025 22:31:02 +0800 Subject: [PATCH] [docs] standardize Hugging Face env var to `HF_TOKEN` (deprecates `HUGGING_FACE_HUB_TOKEN`) (#27020) Signed-off-by: Kay Yan --- docs/deployment/docker.md | 6 +++--- docs/deployment/frameworks/lws.md | 4 ++-- docs/deployment/k8s.md | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index 1f19f2fecfab1..074909dd0676a 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -10,7 +10,7 @@ The image can be used to run OpenAI compatible server and is available on Docker ```bash docker run --runtime nvidia --gpus all \ -v ~/.cache/huggingface:/root/.cache/huggingface \ - --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ + --env "HF_TOKEN=$HF_TOKEN" \ -p 8000:8000 \ --ipc=host \ vllm/vllm-openai:latest \ @@ -22,7 +22,7 @@ This image can also be used with other container engines such as [Podman](https: ```bash podman run --device nvidia.com/gpu=all \ -v ~/.cache/huggingface:/root/.cache/huggingface \ - --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \ + --env "HF_TOKEN=$HF_TOKEN" \ -p 8000:8000 \ --ipc=host \ docker.io/vllm/vllm-openai:latest \ @@ -128,7 +128,7 @@ To run vLLM with the custom-built Docker image: docker run --runtime nvidia --gpus all \ -v ~/.cache/huggingface:/root/.cache/huggingface \ -p 8000:8000 \ - --env "HUGGING_FACE_HUB_TOKEN=" \ + --env "HF_TOKEN=" \ vllm/vllm-openai ``` diff --git a/docs/deployment/frameworks/lws.md b/docs/deployment/frameworks/lws.md index 3b9fa3ea43d64..14710a8dc3334 100644 --- a/docs/deployment/frameworks/lws.md +++ b/docs/deployment/frameworks/lws.md @@ -35,7 +35,7 @@ Deploy the following yaml file `lws.yaml` - name: vllm-leader image: docker.io/vllm/vllm-openai:latest env: - - name: HUGGING_FACE_HUB_TOKEN + - name: HF_TOKEN value: command: - sh @@ -83,7 +83,7 @@ Deploy the following yaml file `lws.yaml` ephemeral-storage: 800Gi cpu: 125 env: - - name: HUGGING_FACE_HUB_TOKEN + - name: HF_TOKEN value: volumeMounts: - mountPath: /dev/shm diff --git a/docs/deployment/k8s.md b/docs/deployment/k8s.md index d3fda7eb6fb6e..54031ec368b5c 100644 --- a/docs/deployment/k8s.md +++ b/docs/deployment/k8s.md @@ -82,7 +82,7 @@ Next, start the vLLM server as a Kubernetes Deployment and Service: "vllm serve meta-llama/Llama-3.2-1B-Instruct" ] env: - - name: HUGGING_FACE_HUB_TOKEN + - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret @@ -209,7 +209,7 @@ INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) "vllm serve mistralai/Mistral-7B-Instruct-v0.3 --trust-remote-code --enable-chunked-prefill --max_num_batched_tokens 1024" ] env: - - name: HUGGING_FACE_HUB_TOKEN + - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret @@ -298,7 +298,7 @@ INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) "vllm serve mistralai/Mistral-7B-v0.3 --port 8000 --trust-remote-code --enable-chunked-prefill --max_num_batched_tokens 1024" ] env: - - name: HUGGING_FACE_HUB_TOKEN + - name: HF_TOKEN valueFrom: secretKeyRef: name: hf-token-secret