diff --git a/vllm/envs.py b/vllm/envs.py
index 2fda2903179b5..c161fa0dff6ba 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -213,7 +213,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     # Target device of vLLM, supporting [cuda (by default),
     # rocm, neuron, cpu]
     "VLLM_TARGET_DEVICE":
-    lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda"),
+    lambda: os.getenv("VLLM_TARGET_DEVICE", "cuda").lower(),
 
     # Maximum number of compilation jobs to run in parallel.
     # By default this is the number of CPUs