diff --git a/tests/compile/test_basic_correctness.py b/tests/compile/test_basic_correctness.py
index fd2b1866e62e..a1e5127ebeeb 100644
--- a/tests/compile/test_basic_correctness.py
+++ b/tests/compile/test_basic_correctness.py
@@ -20,7 +20,6 @@ class TestSetting:
     tp_size: int
     attn_backend: str
     method: str
-    fullgraph: bool
 
 
 # we cannot afford testing the full Cartesian product
@@ -36,7 +35,6 @@ class TestSetting:
             tp_size=2,
             attn_backend="FLASH_ATTN",
             method="generate",
-            fullgraph=True,
         ),
         # llama model with quantization
         TestSetting(
@@ -46,7 +44,6 @@ class TestSetting:
             tp_size=1,
             attn_backend="FLASH_ATTN",
             method="generate",
-            fullgraph=True,
         ),
         # MoE model
         TestSetting(
@@ -56,7 +53,6 @@ class TestSetting:
             tp_size=2,
             attn_backend="FLASH_ATTN",
             method="generate",
-            fullgraph=True,
         ),
         # embedding model
         TestSetting(
@@ -73,7 +69,6 @@ class TestSetting:
             tp_size=1,
             attn_backend="FLASH_ATTN",
             method="encode",
-            fullgraph=True,
         ),
         TestSetting(
             model="BAAI/bge-base-en-v1.5",
@@ -82,7 +77,6 @@ class TestSetting:
             tp_size=1,
             attn_backend="FLASH_ATTN",
             method="encode",
-            fullgraph=True,
         ),
         # vision language model
         TestSetting(
@@ -92,7 +86,6 @@ class TestSetting:
             tp_size=1,
             attn_backend="FLASH_ATTN",
             method="generate_with_image",
-            fullgraph=False,
         ),
     ],
 )
@@ -109,9 +102,8 @@ def test_compile_correctness(
     tp_size = test_setting.tp_size
     attn_backend = test_setting.attn_backend
     method = test_setting.method
-    fullgraph = test_setting.fullgraph
-    if cuda_device_count_stateless() != pp_size * tp_size:
-        pytest.skip(f"Need exactly {pp_size}*{tp_size} CUDA gpus but got "
+    if cuda_device_count_stateless() < pp_size * tp_size:
+        pytest.skip(f"Need at least {pp_size}*{tp_size} CUDA gpus but got "
                     f"{cuda_device_count_stateless()}")
 
     with monkeypatch.context() as m:
@@ -149,9 +141,5 @@ def test_compile_correctness(
         ]:
             all_args.append(final_args + [f"-O{level}"])
             all_envs.append({})
-            if level != CompilationLevel.DYNAMO_ONCE and not fullgraph:
-                # "DYNAMO_ONCE" will always use fullgraph
-                all_envs[-1][
-                    "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE"] = "0"  # type: ignore
 
         compare_all_settings(model, all_args * 3, all_envs, method=method)
diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py
index 84178344a5f3..3439a1b29038 100644
--- a/tests/compile/test_full_graph.py
+++ b/tests/compile/test_full_graph.py
@@ -79,9 +79,7 @@ def test_full_graph(
 ):
     model, model_kwargs = model_info
 
-    with monkeypatch.context() as m:
-        # make sure these models can be captured in full graph mode
-        m.setenv("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1")
+    with monkeypatch.context():
         print(f"MODEL={model}")
 
         run_model(optimization_level, model, model_kwargs)
diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py
index 96d4eae2ee9a..930e4d27b410 100644
--- a/vllm/compilation/wrapper.py
+++ b/vllm/compilation/wrapper.py
@@ -10,7 +10,6 @@ from typing import Callable, Optional
 
 import torch
 
-import vllm.envs as envs
 from vllm.config import (CompilationLevel, CUDAGraphMode,
                          get_current_vllm_config)
 from vllm.logger import init_logger
@@ -47,11 +46,10 @@ class TorchCompileWrapperWithCustomDispatcher:
                 options = get_current_vllm_config(
                 ).compilation_config.inductor_compile_config
 
-            compiled_callable = torch.compile(
-                self.forward,
-                fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
-                backend=backend,
-                options=options)
+            compiled_callable = torch.compile(self.forward,
+                                              fullgraph=True,
+                                              backend=backend,
+                                              options=options)
 
         self.compiled_callable = compiled_callable
         self.original_code_object = self.__class__.forward.__code__
diff --git a/vllm/envs.py b/vllm/envs.py
index 294a0b920fb7..3991a789d80f 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -434,11 +434,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_FLASH_ATTN_VERSION":
     lambda: maybe_convert_int(os.environ.get("VLLM_FLASH_ATTN_VERSION", None)),
 
-    # Internal flag to enable Dynamo fullgraph capture
-    "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE":
-    lambda: bool(
-        os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
-
     # Feature flag to enable/disable Inductor standalone compile.
     # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
     # enabled by default.
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 3539f7561205..dffadd1d769b 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2602,9 +2602,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             backend = self.vllm_config.compilation_config.init_backend(
                 self.vllm_config)
             compilation_counter.dynamo_as_is_count += 1
-            self.model.compile(
-                fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
-                backend=backend)
+            self.model.compile(fullgraph=True, backend=backend)
             return
         # for other compilation levels, cudagraph behavior is controlled by
         # CudagraphWraper and CudagraphDispatcher of vllm.
diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
index c91c871766cf..f662f5a85eff 100644
--- a/vllm/worker/model_runner.py
+++ b/vllm/worker/model_runner.py
@@ -18,7 +18,6 @@ import torch.distributed
 import torch.nn as nn
 from tqdm.auto import tqdm
 
-import vllm.envs as envs
 from vllm.attention import AttentionMetadata, get_attn_backend
 from vllm.attention.backends.abstract import AttentionState
 from vllm.attention.backends.utils import CommonAttentionState
@@ -1099,10 +1098,9 @@ class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]):
             backend = self.vllm_config.compilation_config.init_backend(
                 self.vllm_config)
             compilation_counter.dynamo_as_is_count += 1
-            self.model = torch.compile(
-                self.model,
-                fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
-                backend=backend)
+            self.model = torch.compile(self.model,
+                                       fullgraph=True,
+                                       backend=backend)
 
     def get_model(self) -> nn.Module:
         return self.model