mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 07:25:01 +08:00
add(v1): RequestStatesStats to RequestOutput (#24947)
Signed-off-by: huijjj <huijong.jeong@squeezebits.com> Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
5b80f22087
commit
edaae1825f
@ -86,3 +86,16 @@ def test_max_model_len():
|
|||||||
# It can be less if generation finishes due to other reasons (e.g., EOS)
|
# It can be less if generation finishes due to other reasons (e.g., EOS)
|
||||||
# before reaching the absolute model length limit.
|
# before reaching the absolute model length limit.
|
||||||
assert num_total_tokens <= max_model_len
|
assert num_total_tokens <= max_model_len
|
||||||
|
|
||||||
|
|
||||||
|
def test_log_stats():
|
||||||
|
llm = LLM(
|
||||||
|
model=MODEL_NAME,
|
||||||
|
disable_log_stats=False,
|
||||||
|
gpu_memory_utilization=0.10,
|
||||||
|
enforce_eager=True, # reduce test time
|
||||||
|
)
|
||||||
|
outputs = llm.generate(PROMPTS, sampling_params=None)
|
||||||
|
|
||||||
|
# disable_log_stats is False, every output should have metrics
|
||||||
|
assert all(output.metrics is not None for output in outputs)
|
||||||
|
|||||||
@ -14,6 +14,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs
|
|||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.multimodal.inputs import MultiModalPlaceholderDict
|
from vllm.multimodal.inputs import MultiModalPlaceholderDict
|
||||||
from vllm.sequence import RequestMetrics
|
from vllm.sequence import RequestMetrics
|
||||||
|
from vllm.v1.metrics.stats import RequestStateStats
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
@ -108,7 +109,7 @@ class RequestOutput:
|
|||||||
prompt_logprobs: Optional[PromptLogprobs],
|
prompt_logprobs: Optional[PromptLogprobs],
|
||||||
outputs: list[CompletionOutput],
|
outputs: list[CompletionOutput],
|
||||||
finished: bool,
|
finished: bool,
|
||||||
metrics: Optional[RequestMetrics] = None,
|
metrics: Optional[Union[RequestMetrics, RequestStateStats]] = None,
|
||||||
lora_request: Optional[LoRARequest] = None,
|
lora_request: Optional[LoRARequest] = None,
|
||||||
encoder_prompt: Optional[str] = None,
|
encoder_prompt: Optional[str] = None,
|
||||||
encoder_prompt_token_ids: Optional[list[int]] = None,
|
encoder_prompt_token_ids: Optional[list[int]] = None,
|
||||||
|
|||||||
@ -248,8 +248,7 @@ class RequestState:
|
|||||||
if prompt_token_ids is None and self.prompt_embeds is not None:
|
if prompt_token_ids is None and self.prompt_embeds is not None:
|
||||||
prompt_token_ids = [0] * len(self.prompt_embeds)
|
prompt_token_ids = [0] * len(self.prompt_embeds)
|
||||||
|
|
||||||
return RequestOutput(
|
return RequestOutput(request_id=request_id,
|
||||||
request_id=request_id,
|
|
||||||
prompt=self.prompt,
|
prompt=self.prompt,
|
||||||
prompt_token_ids=prompt_token_ids,
|
prompt_token_ids=prompt_token_ids,
|
||||||
prompt_logprobs=prompt_logprobs,
|
prompt_logprobs=prompt_logprobs,
|
||||||
@ -257,7 +256,7 @@ class RequestState:
|
|||||||
finished=finished,
|
finished=finished,
|
||||||
kv_transfer_params=kv_transfer_params,
|
kv_transfer_params=kv_transfer_params,
|
||||||
num_cached_tokens=self.num_cached_tokens,
|
num_cached_tokens=self.num_cached_tokens,
|
||||||
)
|
metrics=self.stats)
|
||||||
|
|
||||||
def _new_completion_output(
|
def _new_completion_output(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user