diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 37134cfb3da3..f1d907f519c5 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -1,18 +1,24 @@ # SPDX-License-Identifier: Apache-2.0 import json +import sys import time from collections.abc import (AsyncGenerator, Iterable, Iterator, Mapping, Sequence) from concurrent.futures.thread import ThreadPoolExecutor from http import HTTPStatus from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional, - TypedDict, TypeVar, Union) + TypeVar, Union) from fastapi import Request from pydantic import BaseModel, ConfigDict, Field from starlette.datastructures import Headers +if sys.version_info >= (3, 12): + from typing import TypedDict +else: + from typing_extensions import TypedDict + import vllm.envs as envs from vllm.config import ModelConfig from vllm.engine.protocol import EngineClient