mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-01 07:07:02 +08:00
Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com>
This commit is contained in:
parent
516c3f7847
commit
0231ce836a
@ -1055,11 +1055,11 @@ class EagleProposer:
|
||||
elif (
|
||||
isinstance(target_embed_tokens.weight, torch.Tensor)
|
||||
and isinstance(self.model.model.embed_tokens.weight, torch.Tensor)
|
||||
and torch.allclose(
|
||||
# TODO: Offload to CPU for comparison to avoid extra GPU memory
|
||||
# usage in CI testing environments with limited GPU memory
|
||||
and torch.equal(
|
||||
target_embed_tokens.weight.cpu(),
|
||||
self.model.model.embed_tokens.weight.cpu(),
|
||||
rtol=1e-5,
|
||||
atol=1e-7,
|
||||
)
|
||||
):
|
||||
share_embeddings = True
|
||||
@ -1105,8 +1105,11 @@ class EagleProposer:
|
||||
hasattr(target_language_model, "lm_head")
|
||||
and isinstance(target_language_model.lm_head.weight, torch.Tensor)
|
||||
and isinstance(self.model.lm_head.weight, torch.Tensor)
|
||||
# TODO: Offload to CPU for comparison to avoid extra GPU memory
|
||||
# usage in CI testing environments with limited GPU memory
|
||||
and torch.equal(
|
||||
target_language_model.lm_head.weight, self.model.lm_head.weight
|
||||
target_language_model.lm_head.weight.cpu(),
|
||||
self.model.lm_head.weight.cpu(),
|
||||
)
|
||||
):
|
||||
share_lm_head = True
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user