From 0231ce836a27ad0ed722e4b7162821baeeb19fdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eldar=20Kurti=C4=87?=
 <8884008+eldarkurtic@users.noreply.github.com>
Date: Tue, 25 Nov 2025 15:23:38 +0100
Subject: [PATCH] Revert back to torch.equal over torch.allclose from #28819 
 (#29086)

Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com>
---
 vllm/v1/spec_decode/eagle.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
index 784ccbc04932f..7b9037c03d4f0 100644
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -1055,11 +1055,11 @@ class EagleProposer:
                 elif (
                     isinstance(target_embed_tokens.weight, torch.Tensor)
                     and isinstance(self.model.model.embed_tokens.weight, torch.Tensor)
-                    and torch.allclose(
+                    # TODO: Offload to CPU for comparison to avoid extra GPU memory
+                    # usage in CI testing environments with limited GPU memory
+                    and torch.equal(
                         target_embed_tokens.weight.cpu(),
                         self.model.model.embed_tokens.weight.cpu(),
-                        rtol=1e-5,
-                        atol=1e-7,
                     )
                 ):
                     share_embeddings = True
@@ -1105,8 +1105,11 @@ class EagleProposer:
                 hasattr(target_language_model, "lm_head")
                 and isinstance(target_language_model.lm_head.weight, torch.Tensor)
                 and isinstance(self.model.lm_head.weight, torch.Tensor)
+                # TODO: Offload to CPU for comparison to avoid extra GPU memory
+                # usage in CI testing environments with limited GPU memory
                 and torch.equal(
-                    target_language_model.lm_head.weight, self.model.lm_head.weight
+                    target_language_model.lm_head.weight.cpu(),
+                    self.model.lm_head.weight.cpu(),
                 )
             ):
                 share_lm_head = True