mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-04 17:40:55 +08:00
[Bugfix] Fix 2 Node and Spec Decode tests (#13341)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
a0231b7c25
commit
5d2965b7d7
@ -275,11 +275,11 @@ def _compare_tp(
|
||||
if load_format == "dummy":
|
||||
# Avoid OOM
|
||||
text_overrides = {
|
||||
"num_layers": 1,
|
||||
"num_hidden_layers": 1,
|
||||
"num_experts": 2,
|
||||
"num_experts_per_tok": 2,
|
||||
"num_local_experts": 2,
|
||||
"num_hidden_layers": 4,
|
||||
"hidden_size": 512,
|
||||
"intermediate_size": 800,
|
||||
"num_attention_heads": 4,
|
||||
"num_key_value_heads": 1,
|
||||
}
|
||||
|
||||
if is_multimodal:
|
||||
|
||||
@ -6,6 +6,7 @@ from typing import List, Optional, Set, Tuple
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.model_executor.layers.sampler import SamplerOutput
|
||||
from vllm.sequence import ExecuteModelRequest
|
||||
from vllm.spec_decode.interfaces import SpeculativeProposals
|
||||
@ -25,11 +26,18 @@ class NGramWorker(NonLLMProposerWorkerBase):
|
||||
which don't rely on LLM model to give proposals.
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
def __init__(
|
||||
self,
|
||||
vllm_config: VllmConfig,
|
||||
local_rank: int,
|
||||
device_type: str = "cuda",
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(vllm_config)
|
||||
|
||||
# Get local_rank/vocab_size from kwargs attribute
|
||||
self.local_rank = kwargs["local_rank"]
|
||||
self.vocab_size = kwargs["vllm_config"].model_config.get_vocab_size()
|
||||
self.device_type = kwargs.get("device_type", "cuda")
|
||||
self.local_rank = local_rank
|
||||
self.device_type = device_type
|
||||
|
||||
# Lazy initialization list.
|
||||
self._proposer: Top1Proposer
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user