Enable pinned memory by default on Nvidia. (#10656)

Removed the --fast pinned_memory flag. You can use --disable-pinned-memory to disable it. Please report if it causes any issues.
2025-12-08 21:44:33 +08:00 · 2025-11-05 15:08:13 -08:00 · 2025-11-05 15:08:13 -08:00 · 1d69245981
commit 1d69245981
parent 97f198e421
2 changed files with 11 additions and 14 deletions
--- a/comfy/cli_args.py
+++ b/comfy/cli_args.py
@ -145,10 +145,11 @@ class PerformanceFeature(enum.Enum):
    Fp8MatrixMultiplication = "fp8_matrix_mult"
    CublasOps = "cublas_ops"
    AutoTune = "autotune"
-    PinnedMem = "pinned_memory"

 parser.add_argument("--fast", nargs="*", type=PerformanceFeature, help="Enable some untested and potentially quality deteriorating optimizations. This is used to test new features so using it might crash your comfyui. --fast with no arguments enables everything. You can pass a list specific optimizations if you only want to enable specific ones. Current valid optimizations: {}".format(" ".join(map(lambda c: c.value, PerformanceFeature))))

+parser.add_argument("--disable-pinned-memory", action="store_true", help="Disable pinned memory use.")
+
 parser.add_argument("--mmap-torch-files", action="store_true", help="Use mmap when loading ckpt/pt files.")
 parser.add_argument("--disable-mmap", action="store_true", help="Don't use mmap when loading safetensors.")

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@ -1085,22 +1085,21 @@ def cast_to_device(tensor, device, dtype, copy=False):

 PINNED_MEMORY = {}
 TOTAL_PINNED_MEMORY = 0
-if PerformanceFeature.PinnedMem in args.fast:
-    if WINDOWS:
-        MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.45  # Windows limit is apparently 50%
-    else:
-        MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
-else:
-    MAX_PINNED_MEMORY = -1
+MAX_PINNED_MEMORY = -1
+if not args.disable_pinned_memory:
+    if is_nvidia():
+        if WINDOWS:
+            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.45  # Windows limit is apparently 50%
+        else:
+            MAX_PINNED_MEMORY = get_total_memory(torch.device("cpu")) * 0.95
+        logging.info("Enabled pinned memory {}".format(MAX_PINNED_MEMORY // (1024 * 1024)))
+

 def pin_memory(tensor):
    global TOTAL_PINNED_MEMORY
    if MAX_PINNED_MEMORY <= 0:
        return False

-    if not is_nvidia():
-        return False
-
    if not is_device_cpu(tensor.device):
        return False

@ -1121,9 +1120,6 @@ def unpin_memory(tensor):
    if MAX_PINNED_MEMORY <= 0:
        return False

-    if not is_nvidia():
-        return False
-
    if not is_device_cpu(tensor.device):
        return False