mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-29 03:27:04 +08:00
migrate pydantic from v1 to v2 (#2531)
This commit is contained in:
parent
d75c40734a
commit
71d63ed72e
@ -5,5 +5,5 @@ torch-neuronx >= 2.1.0
|
|||||||
neuronx-cc
|
neuronx-cc
|
||||||
fastapi
|
fastapi
|
||||||
uvicorn[standard]
|
uvicorn[standard]
|
||||||
pydantic == 1.10.13 # Required for OpenAI server.
|
pydantic >= 2.0 # Required for OpenAI server.
|
||||||
aioprometheus[starlette]
|
aioprometheus[starlette]
|
||||||
|
|||||||
@ -9,5 +9,5 @@ tokenizers>=0.15.0
|
|||||||
transformers >= 4.36.0 # Required for Mixtral.
|
transformers >= 4.36.0 # Required for Mixtral.
|
||||||
fastapi
|
fastapi
|
||||||
uvicorn[standard]
|
uvicorn[standard]
|
||||||
pydantic == 1.10.13 # Required for OpenAI server.
|
pydantic >= 2.0 # Required for OpenAI server.
|
||||||
aioprometheus[starlette]
|
aioprometheus[starlette]
|
||||||
|
|||||||
@ -8,5 +8,5 @@ transformers >= 4.36.0 # Required for Mixtral.
|
|||||||
xformers == 0.0.23.post1 # Required for CUDA 12.1.
|
xformers == 0.0.23.post1 # Required for CUDA 12.1.
|
||||||
fastapi
|
fastapi
|
||||||
uvicorn[standard]
|
uvicorn[standard]
|
||||||
pydantic == 1.10.13 # Required for OpenAI server.
|
pydantic >= 2.0 # Required for OpenAI server.
|
||||||
aioprometheus[starlette]
|
aioprometheus[starlette]
|
||||||
|
|||||||
@ -106,7 +106,7 @@ app.add_route("/metrics", metrics) # Exposes HTTP metrics
|
|||||||
@app.exception_handler(RequestValidationError)
|
@app.exception_handler(RequestValidationError)
|
||||||
async def validation_exception_handler(_, exc):
|
async def validation_exception_handler(_, exc):
|
||||||
err = openai_serving_chat.create_error_response(message=str(exc))
|
err = openai_serving_chat.create_error_response(message=str(exc))
|
||||||
return JSONResponse(err.dict(), status_code=HTTPStatus.BAD_REQUEST)
|
return JSONResponse(err.model_dump(), status_code=HTTPStatus.BAD_REQUEST)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/health")
|
@app.get("/health")
|
||||||
@ -118,7 +118,7 @@ async def health() -> Response:
|
|||||||
@app.get("/v1/models")
|
@app.get("/v1/models")
|
||||||
async def show_available_models():
|
async def show_available_models():
|
||||||
models = await openai_serving_chat.show_available_models()
|
models = await openai_serving_chat.show_available_models()
|
||||||
return JSONResponse(content=models.dict())
|
return JSONResponse(content=models.model_dump())
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/chat/completions")
|
@app.post("/v1/chat/completions")
|
||||||
@ -126,22 +126,28 @@ async def create_chat_completion(request: ChatCompletionRequest,
|
|||||||
raw_request: Request):
|
raw_request: Request):
|
||||||
generator = await openai_serving_chat.create_chat_completion(
|
generator = await openai_serving_chat.create_chat_completion(
|
||||||
request, raw_request)
|
request, raw_request)
|
||||||
if request.stream and not isinstance(generator, ErrorResponse):
|
if isinstance(generator, ErrorResponse):
|
||||||
|
return JSONResponse(content=generator.model_dump(),
|
||||||
|
status_code=generator.code)
|
||||||
|
if request.stream:
|
||||||
return StreamingResponse(content=generator,
|
return StreamingResponse(content=generator,
|
||||||
media_type="text/event-stream")
|
media_type="text/event-stream")
|
||||||
else:
|
else:
|
||||||
return JSONResponse(content=generator.dict())
|
return JSONResponse(content=generator.model_dump())
|
||||||
|
|
||||||
|
|
||||||
@app.post("/v1/completions")
|
@app.post("/v1/completions")
|
||||||
async def create_completion(request: CompletionRequest, raw_request: Request):
|
async def create_completion(request: CompletionRequest, raw_request: Request):
|
||||||
generator = await openai_serving_completion.create_completion(
|
generator = await openai_serving_completion.create_completion(
|
||||||
request, raw_request)
|
request, raw_request)
|
||||||
if request.stream and not isinstance(generator, ErrorResponse):
|
if isinstance(generator, ErrorResponse):
|
||||||
|
return JSONResponse(content=generator.model_dump(),
|
||||||
|
status_code=generator.code)
|
||||||
|
if request.stream:
|
||||||
return StreamingResponse(content=generator,
|
return StreamingResponse(content=generator,
|
||||||
media_type="text/event-stream")
|
media_type="text/event-stream")
|
||||||
else:
|
else:
|
||||||
return JSONResponse(content=generator.dict())
|
return JSONResponse(content=generator.model_dump())
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -14,7 +14,7 @@ class ErrorResponse(BaseModel):
|
|||||||
message: str
|
message: str
|
||||||
type: str
|
type: str
|
||||||
param: Optional[str] = None
|
param: Optional[str] = None
|
||||||
code: Optional[str] = None
|
code: int
|
||||||
|
|
||||||
|
|
||||||
class ModelPermission(BaseModel):
|
class ModelPermission(BaseModel):
|
||||||
@ -189,7 +189,7 @@ class CompletionStreamResponse(BaseModel):
|
|||||||
created: int = Field(default_factory=lambda: int(time.time()))
|
created: int = Field(default_factory=lambda: int(time.time()))
|
||||||
model: str
|
model: str
|
||||||
choices: List[CompletionResponseStreamChoice]
|
choices: List[CompletionResponseStreamChoice]
|
||||||
usage: Optional[UsageInfo]
|
usage: Optional[UsageInfo] = Field(default=None)
|
||||||
|
|
||||||
|
|
||||||
class ChatMessage(BaseModel):
|
class ChatMessage(BaseModel):
|
||||||
@ -229,5 +229,4 @@ class ChatCompletionStreamResponse(BaseModel):
|
|||||||
created: int = Field(default_factory=lambda: int(time.time()))
|
created: int = Field(default_factory=lambda: int(time.time()))
|
||||||
model: str
|
model: str
|
||||||
choices: List[ChatCompletionResponseStreamChoice]
|
choices: List[ChatCompletionResponseStreamChoice]
|
||||||
usage: Optional[UsageInfo] = Field(
|
usage: Optional[UsageInfo] = Field(default=None)
|
||||||
default=None, description="data about request and response")
|
|
||||||
|
|||||||
@ -102,7 +102,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
created=created_time,
|
created=created_time,
|
||||||
choices=[choice_data],
|
choices=[choice_data],
|
||||||
model=model_name)
|
model=model_name)
|
||||||
data = chunk.json(exclude_unset=True, ensure_ascii=False)
|
data = chunk.model_dump_json(exclude_unset=True)
|
||||||
yield f"data: {data}\n\n"
|
yield f"data: {data}\n\n"
|
||||||
|
|
||||||
# Send response to echo the input portion of the last message
|
# Send response to echo the input portion of the last message
|
||||||
@ -125,7 +125,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
created=created_time,
|
created=created_time,
|
||||||
choices=[choice_data],
|
choices=[choice_data],
|
||||||
model=model_name)
|
model=model_name)
|
||||||
data = chunk.json(exclude_unset=True, ensure_ascii=False)
|
data = chunk.model_dump_json(exclude_unset=True)
|
||||||
yield f"data: {data}\n\n"
|
yield f"data: {data}\n\n"
|
||||||
|
|
||||||
# Send response for each token for each request.n (index)
|
# Send response for each token for each request.n (index)
|
||||||
@ -156,7 +156,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
created=created_time,
|
created=created_time,
|
||||||
choices=[choice_data],
|
choices=[choice_data],
|
||||||
model=model_name)
|
model=model_name)
|
||||||
data = chunk.json(exclude_unset=True, ensure_ascii=False)
|
data = chunk.model_dump_json(exclude_unset=True)
|
||||||
yield f"data: {data}\n\n"
|
yield f"data: {data}\n\n"
|
||||||
else:
|
else:
|
||||||
# Send the finish response for each request.n only once
|
# Send the finish response for each request.n only once
|
||||||
@ -178,9 +178,8 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
model=model_name)
|
model=model_name)
|
||||||
if final_usage is not None:
|
if final_usage is not None:
|
||||||
chunk.usage = final_usage
|
chunk.usage = final_usage
|
||||||
data = chunk.json(exclude_unset=True,
|
data = chunk.model_dump_json(exclude_unset=True,
|
||||||
exclude_none=True,
|
exclude_none=True)
|
||||||
ensure_ascii=False)
|
|
||||||
yield f"data: {data}\n\n"
|
yield f"data: {data}\n\n"
|
||||||
finish_reason_sent[i] = True
|
finish_reason_sent[i] = True
|
||||||
# Send the final done message after all response.n are finished
|
# Send the final done message after all response.n are finished
|
||||||
|
|||||||
@ -74,7 +74,7 @@ async def completion_stream_generator(
|
|||||||
logprobs=logprobs,
|
logprobs=logprobs,
|
||||||
finish_reason=finish_reason,
|
finish_reason=finish_reason,
|
||||||
)
|
)
|
||||||
]).json(exclude_unset=True, ensure_ascii=False)
|
]).model_dump_json(exclude_unset=True)
|
||||||
yield f"data: {response_json}\n\n"
|
yield f"data: {response_json}\n\n"
|
||||||
|
|
||||||
if output.finish_reason is not None:
|
if output.finish_reason is not None:
|
||||||
@ -99,7 +99,7 @@ async def completion_stream_generator(
|
|||||||
)
|
)
|
||||||
],
|
],
|
||||||
usage=final_usage,
|
usage=final_usage,
|
||||||
).json(exclude_unset=True, ensure_ascii=False)
|
).model_dump_json(exclude_unset=True)
|
||||||
yield f"data: {response_json}\n\n"
|
yield f"data: {response_json}\n\n"
|
||||||
|
|
||||||
yield "data: [DONE]\n\n"
|
yield "data: [DONE]\n\n"
|
||||||
@ -279,7 +279,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
|||||||
# When user requests streaming but we don't stream, we still need to
|
# When user requests streaming but we don't stream, we still need to
|
||||||
# return a streaming response with a single event.
|
# return a streaming response with a single event.
|
||||||
if request.stream:
|
if request.stream:
|
||||||
response_json = response.json(ensure_ascii=False)
|
response_json = response.model_dump_json()
|
||||||
|
|
||||||
async def fake_stream_generator() -> AsyncGenerator[str, None]:
|
async def fake_stream_generator() -> AsyncGenerator[str, None]:
|
||||||
yield f"data: {response_json}\n\n"
|
yield f"data: {response_json}\n\n"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user