Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
Woosuk Kwon 2025-09-19 23:47:20 +00:00
parent 6f038fc4fb
commit a66aa37f40

View File

@ -7,6 +7,7 @@ from typing import Any, Optional
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.distributed import get_tp_group from vllm.distributed import get_tp_group
@ -107,6 +108,9 @@ class GPUModelRunner:
m.consumed_memory / GiB_bytes, m.consumed_memory / GiB_bytes,
time_after_load - time_before_load) time_after_load - time_before_load)
def get_model(self) -> nn.Module:
return self.model
def get_kv_cache_spec(self): def get_kv_cache_spec(self):
return get_kv_cache_spec(self.vllm_config, self.kv_cache_dtype) return get_kv_cache_spec(self.vllm_config, self.kv_cache_dtype)