Signed-off-by: Woosuk Kwon <woosuk@thinkingmachines.ai>
This commit is contained in:
Woosuk Kwon 2025-09-19 23:47:20 +00:00
parent 6f038fc4fb
commit a66aa37f40

View File

@ -7,6 +7,7 @@ from typing import Any, Optional
import numpy as np
import torch
import torch.nn as nn
from vllm.config import VllmConfig
from vllm.distributed import get_tp_group
@ -107,6 +108,9 @@ class GPUModelRunner:
m.consumed_memory / GiB_bytes,
time_after_load - time_before_load)
def get_model(self) -> nn.Module:
return self.model
def get_kv_cache_spec(self):
return get_kv_cache_spec(self.vllm_config, self.kv_cache_dtype)