mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 04:05:01 +08:00
[PERF] Use pybase64 to more quickly decode prompt embeddings (#22469)
Signed-off-by: Andrew Sansom <andrew@protopia.ai>
This commit is contained in:
parent
1ee5ead5f8
commit
e2c8f1edec
@ -1,7 +1,6 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import asyncio
|
import asyncio
|
||||||
import base64
|
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
@ -12,6 +11,7 @@ from http import HTTPStatus
|
|||||||
from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
|
from typing import (Annotated, Any, Callable, ClassVar, Generic, Optional,
|
||||||
TypeVar, Union, cast, overload)
|
TypeVar, Union, cast, overload)
|
||||||
|
|
||||||
|
import pybase64
|
||||||
import torch
|
import torch
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
@ -1008,7 +1008,8 @@ class OpenAIServing:
|
|||||||
) -> list[EmbedsPrompt]:
|
) -> list[EmbedsPrompt]:
|
||||||
|
|
||||||
def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
|
def _load_and_validate_embed(embed: bytes) -> EmbedsPrompt:
|
||||||
tensor = torch.load(io.BytesIO(base64.b64decode(embed)),
|
tensor = torch.load(io.BytesIO(
|
||||||
|
pybase64.b64decode(embed, validate=True)),
|
||||||
weights_only=True)
|
weights_only=True)
|
||||||
assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
|
assert isinstance(tensor, torch.Tensor) and tensor.dtype in (
|
||||||
torch.float32,
|
torch.float32,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user