From 86dc22a028f933df5ed47a7da8b94c22e526454c Mon Sep 17 00:00:00 2001
From: kijai <40791699+kijai@users.noreply.github.com>
Date: Tue, 23 Jul 2024 19:00:04 +0300
Subject: [PATCH] Add faster screencap node

---
 __init__.py          |  1 +
 nodes/image_nodes.py | 53 ++++++++++++++++++++++++++++++++++++++++++--
 requirements.txt     |  3 ++-
 3 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/__init__.py b/__init__.py
index 8f422f8..022bf55 100644
--- a/__init__.py
+++ b/__init__.py
@@ -128,6 +128,7 @@ NODE_CONFIG = {
     "GLIGENTextBoxApplyBatchCoords": {"class": GLIGENTextBoxApplyBatchCoords},
     "Intrinsic_lora_sampling": {"class": Intrinsic_lora_sampling, "name": "Intrinsic Lora Sampling"},
     "CheckpointPerturbWeights": {"class": CheckpointPerturbWeights, "name": "CheckpointPerturbWeights"},
+    "Screencap_mss": {"class": Screencap_mss, "name": "Screencap mss"},
 
     #instance diffusion
     "CreateInstanceDiffusionTracking": {"class": CreateInstanceDiffusionTracking},
diff --git a/nodes/image_nodes.py b/nodes/image_nodes.py
index a16a741..7a51159 100644
--- a/nodes/image_nodes.py
+++ b/nodes/image_nodes.py
@@ -395,22 +395,71 @@ Can be used for realtime diffusion with autoqueue.
     } 
 
     def screencap(self, x, y, width, height, num_frames, delay):
+        start_time = time.time()
         captures = []
         bbox = (x, y, x + width, y + height)
         
         for _ in range(num_frames):
             # Capture screen
             screen_capture = ImageGrab.grab(bbox=bbox)
-            screen_capture_torch = torch.tensor(np.array(screen_capture), dtype=torch.float32) / 255.0
-            screen_capture_torch = screen_capture_torch.unsqueeze(0)
+            screen_capture_torch = torch.from_numpy(np.array(screen_capture, dtype=np.float32) / 255.0).unsqueeze(0)
             captures.append(screen_capture_torch)
             
             # Wait for a short delay if more than one frame is to be captured
             if num_frames > 1:
                 time.sleep(delay)
+
+        elapsed_time = time.time() - start_time
+        print(f"screengrab took {elapsed_time} seconds.")
         
         return (torch.cat(captures, dim=0),)
     
+class Screencap_mss:
+
+    @classmethod
+    def IS_CHANGED(cls):
+
+        return
+
+    RETURN_TYPES = ("IMAGE",)
+    RETURN_NAMES = ("image",)
+    FUNCTION = "screencap"
+    CATEGORY = "KJNodes/experimental"
+    DESCRIPTION = """
+Captures an area specified by screen coordinates.  
+Can be used for realtime diffusion with autoqueue.
+"""
+
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                 "x": ("INT", {"default": 0,"min": 0, "max": 4096, "step": 1}),
+                 "y": ("INT", {"default": 0,"min": 0, "max": 4096, "step": 1}),
+                 "width": ("INT", {"default": 512,"min": 0, "max": 4096, "step": 1}),
+                 "height": ("INT", {"default": 512,"min": 0, "max": 4096, "step": 1}),
+                 "num_frames": ("INT", {"default": 1,"min": 1, "max": 255, "step": 1}),
+                 "delay": ("FLOAT", {"default": 0.1,"min": 0.0, "max": 10.0, "step": 0.01}),
+        },
+    } 
+
+    def screencap(self, x, y, width, height, num_frames, delay):
+        from mss import mss
+        captures = []
+        with mss() as sct:
+            bbox = {'top': y, 'left': x, 'width': width, 'height': height}
+            
+            for _ in range(num_frames):
+                sct_img = sct.grab(bbox)
+                img_np = np.array(sct_img)
+                img_torch = torch.from_numpy(img_np[..., [2, 1, 0]]).float() / 255.0
+                captures.append(img_torch)
+                
+                if num_frames > 1:
+                    time.sleep(delay)
+        
+        return (torch.stack(captures, 0),)
+    
 class AddLabel:
     @classmethod
     def INPUT_TYPES(s):
diff --git a/requirements.txt b/requirements.txt
index 8577961..7e26079 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,5 @@ pillow>=10.3.0
 scipy
 color-matcher
 matplotlib
-huggingface_hub
\ No newline at end of file
+huggingface_hub
+mss
\ No newline at end of file