vllm/tests/entrypoints/openai/utils.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from collections.abc import AsyncGenerator
from typing import Any

from vllm.entrypoints.openai.protocol import (
    ChatCompletionResponse,
    ChatCompletionResponseChoice,
    ChatCompletionStreamResponse,
    ChatMessage,
    UsageInfo,
)


async def accumulate_streaming_response(
    stream_generator: AsyncGenerator[str, None],
) -> ChatCompletionResponse:
    """
    Accumulate streaming SSE chunks into a complete ChatCompletionResponse.

    This helper parses the SSE format and builds up the complete response
    by combining all the delta chunks.
    """
    accumulated_content = ""
    accumulated_reasoning = None
    accumulated_tool_calls: list[dict[str, Any]] = []
    role = None
    finish_reason = None
    response_id = None
    created = None
    model = None
    index = 0

    async for chunk_str in stream_generator:
        # Skip empty lines and [DONE] marker
        if not chunk_str.strip() or chunk_str.strip() == "data: [DONE]":
            continue

        # Parse SSE format: "data: {json}\n\n"
        if chunk_str.startswith("data: "):
            json_str = chunk_str[6:].strip()
            try:
                chunk_data = json.loads(json_str)
                # print(f"DEBUG: Parsed chunk_data: {chunk_data}")
                chunk = ChatCompletionStreamResponse(**chunk_data)

                # Store metadata from first chunk
                if response_id is None:
                    response_id = chunk.id
                    created = chunk.created
                    model = chunk.model

                # Process each choice in the chunk
                for choice in chunk.choices:
                    if choice.delta.role:
                        role = choice.delta.role
                    if choice.delta.content:
                        accumulated_content += choice.delta.content
                    if choice.delta.reasoning:
                        if accumulated_reasoning is None:
                            accumulated_reasoning = ""
                        accumulated_reasoning += choice.delta.reasoning
                    if choice.delta.tool_calls:
                        # Accumulate tool calls
                        for tool_call_delta in choice.delta.tool_calls:
                            # Find or create the tool call at this index
                            while len(accumulated_tool_calls) <= tool_call_delta.index:
                                accumulated_tool_calls.append(
                                    {
                                        "id": None,
                                        "type": "function",
                                        "function": {"name": "", "arguments": ""},
                                    }
                                )

                            if tool_call_delta.id:
                                accumulated_tool_calls[tool_call_delta.index]["id"] = (
                                    tool_call_delta.id
                                )
                            if tool_call_delta.function:
                                if tool_call_delta.function.name:
                                    accumulated_tool_calls[tool_call_delta.index][
                                        "function"
                                    ]["name"] += tool_call_delta.function.name
                                if tool_call_delta.function.arguments:
                                    accumulated_tool_calls[tool_call_delta.index][
                                        "function"
                                    ]["arguments"] += tool_call_delta.function.arguments

                    if choice.finish_reason:
                        finish_reason = choice.finish_reason
                    if choice.index is not None:
                        index = choice.index

            except json.JSONDecodeError:
                continue

    # Build the final message
    message_kwargs = {
        "role": role or "assistant",
        "content": accumulated_content if accumulated_content else None,
        "reasoning": accumulated_reasoning,
    }

    # Only include tool_calls if there are any
    if accumulated_tool_calls:
        message_kwargs["tool_calls"] = [
            {"id": tc["id"], "type": tc["type"], "function": tc["function"]}
            for tc in accumulated_tool_calls
        ]

    message = ChatMessage(**message_kwargs)

    # Build the final response
    choice = ChatCompletionResponseChoice(
        index=index,
        message=message,
        finish_reason=finish_reason or "stop",
    )

    # Create usage info (with dummy values for tests)
    usage = UsageInfo(
        prompt_tokens=0,
        completion_tokens=0,
        total_tokens=0,
    )

    response = ChatCompletionResponse(
        id=response_id or "chatcmpl-test",
        object="chat.completion",
        created=created or 0,
        model=model or "test-model",
        choices=[choice],
        usage=usage,
    )

    return response


def verify_harmony_messages(
    messages: list[Any], expected_messages: list[dict[str, Any]]
):
    assert len(messages) == len(expected_messages)
    for msg, expected in zip(messages, expected_messages):
        if "role" in expected:
            assert msg.author.role == expected["role"]
        if "author_name" in expected:
            assert msg.author.name == expected["author_name"]
        if "channel" in expected:
            assert msg.channel == expected["channel"]
        if "recipient" in expected:
            assert msg.recipient == expected["recipient"]
        if "content" in expected:
            assert msg.content[0].text == expected["content"]
        if "content_type" in expected:
            assert msg.content_type == expected["content_type"]
        if "tool_definitions" in expected:
            # Check that the tool definitions match the expected list of tool names
            actual_tools = [t.name for t in msg.content[0].tools["functions"].tools]
            assert actual_tools == expected["tool_definitions"]


def verify_chat_response(
    response: ChatCompletionResponse,
    content: str | None = None,
    reasoning: str | None = None,
    tool_calls: list[tuple[str, str]] | None = None,
):
    assert len(response.choices) == 1
    message = response.choices[0].message

    if content is not None:
        assert message.content == content
    else:
        assert not message.content

    if reasoning is not None:
        assert message.reasoning == reasoning
    else:
        assert not message.reasoning

    if tool_calls:
        assert message.tool_calls is not None
        assert len(message.tool_calls) == len(tool_calls)
        for tc, (expected_name, expected_args) in zip(message.tool_calls, tool_calls):
            assert tc.function.name == expected_name
            assert tc.function.arguments == expected_args
    else:
        assert not message.tool_calls