mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 22:46:01 +08:00
[Benchmark] Support Infinity API (#26641)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
f0a30a067b
commit
5be7ca1b99
@ -1584,7 +1584,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]:
|
|||||||
|
|
||||||
if dataset_class.IS_MULTIMODAL and not (
|
if dataset_class.IS_MULTIMODAL and not (
|
||||||
args.backend in ("openai-chat", "openai-audio")
|
args.backend in ("openai-chat", "openai-audio")
|
||||||
or "openai-embeddings-" in args.backend
|
or "embeddings-" in args.backend
|
||||||
):
|
):
|
||||||
# multi-modal benchmark is only available on OpenAI Chat
|
# multi-modal benchmark is only available on OpenAI Chat
|
||||||
# endpoint-type.
|
# endpoint-type.
|
||||||
|
|||||||
@ -581,29 +581,6 @@ async def async_request_openai_embeddings_chat(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def async_request_openai_embeddings_clip(
|
|
||||||
request_func_input: RequestFuncInput,
|
|
||||||
session: aiohttp.ClientSession,
|
|
||||||
pbar: Optional[tqdm] = None,
|
|
||||||
) -> RequestFuncOutput:
|
|
||||||
if request_func_input.multi_modal_content:
|
|
||||||
# Image input
|
|
||||||
request_func_input.prompt = ""
|
|
||||||
|
|
||||||
# max_model_len=77 is too short for most datasets,
|
|
||||||
# so by default we truncate the prompt to max_model_len
|
|
||||||
if request_func_input.extra_body is None:
|
|
||||||
request_func_input.extra_body = {}
|
|
||||||
if "truncate_prompt_tokens" not in request_func_input.extra_body:
|
|
||||||
request_func_input.extra_body["truncate_prompt_tokens"] = -1
|
|
||||||
|
|
||||||
return await async_request_openai_embeddings_chat(
|
|
||||||
request_func_input,
|
|
||||||
session,
|
|
||||||
pbar=pbar,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _try_extract_request_idx(request_func_input: RequestFuncInput):
|
def _try_extract_request_idx(request_func_input: RequestFuncInput):
|
||||||
if request_func_input.request_id:
|
if request_func_input.request_id:
|
||||||
match = re.search(r"(\d+)$", request_func_input.request_id)
|
match = re.search(r"(\d+)$", request_func_input.request_id)
|
||||||
@ -616,11 +593,20 @@ def _try_extract_request_idx(request_func_input: RequestFuncInput):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def async_request_openai_embeddings_vlm2vec(
|
def _preprocess_clip(request_func_input: RequestFuncInput):
|
||||||
request_func_input: RequestFuncInput,
|
if request_func_input.multi_modal_content:
|
||||||
session: aiohttp.ClientSession,
|
# Image input
|
||||||
pbar: Optional[tqdm] = None,
|
request_func_input.prompt = ""
|
||||||
) -> RequestFuncOutput:
|
|
||||||
|
# max_model_len=77 is too short for most datasets,
|
||||||
|
# so by default we truncate the prompt to max_model_len
|
||||||
|
if request_func_input.extra_body is None:
|
||||||
|
request_func_input.extra_body = {}
|
||||||
|
if "truncate_prompt_tokens" not in request_func_input.extra_body:
|
||||||
|
request_func_input.extra_body["truncate_prompt_tokens"] = -1
|
||||||
|
|
||||||
|
|
||||||
|
def _preprocess_vlm2vec(request_func_input: RequestFuncInput):
|
||||||
if request_func_input.multi_modal_content:
|
if request_func_input.multi_modal_content:
|
||||||
request_idx = _try_extract_request_idx(request_func_input)
|
request_idx = _try_extract_request_idx(request_func_input)
|
||||||
|
|
||||||
@ -637,6 +623,28 @@ async def async_request_openai_embeddings_vlm2vec(
|
|||||||
f"{request_func_input.prompt}"
|
f"{request_func_input.prompt}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def async_request_openai_embeddings_clip(
|
||||||
|
request_func_input: RequestFuncInput,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
pbar: Optional[tqdm] = None,
|
||||||
|
) -> RequestFuncOutput:
|
||||||
|
_preprocess_clip(request_func_input)
|
||||||
|
|
||||||
|
return await async_request_openai_embeddings_chat(
|
||||||
|
request_func_input,
|
||||||
|
session,
|
||||||
|
pbar=pbar,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def async_request_openai_embeddings_vlm2vec(
|
||||||
|
request_func_input: RequestFuncInput,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
pbar: Optional[tqdm] = None,
|
||||||
|
) -> RequestFuncOutput:
|
||||||
|
_preprocess_vlm2vec(request_func_input)
|
||||||
|
|
||||||
return await async_request_openai_embeddings_chat(
|
return await async_request_openai_embeddings_chat(
|
||||||
request_func_input,
|
request_func_input,
|
||||||
session,
|
session,
|
||||||
@ -645,6 +653,61 @@ async def async_request_openai_embeddings_vlm2vec(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def async_request_infinity_embeddings(
|
||||||
|
request_func_input: RequestFuncInput,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
pbar: Optional[tqdm] = None,
|
||||||
|
) -> RequestFuncOutput:
|
||||||
|
api_url = request_func_input.api_url
|
||||||
|
_validate_api_url(api_url, "Infinity Embeddings API", "embeddings")
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"model": request_func_input.model_name
|
||||||
|
if request_func_input.model_name
|
||||||
|
else request_func_input.model,
|
||||||
|
}
|
||||||
|
|
||||||
|
if request_func_input.prompt:
|
||||||
|
payload["input"] = request_func_input.prompt
|
||||||
|
else:
|
||||||
|
mm_content = request_func_input.multi_modal_content
|
||||||
|
assert isinstance(mm_content, dict)
|
||||||
|
|
||||||
|
mm_type = mm_content["type"]
|
||||||
|
payload["input"] = mm_content[mm_type]["url"]
|
||||||
|
payload["modality"] = mm_type.split("_", 1)[0]
|
||||||
|
|
||||||
|
_update_payload_common(payload, request_func_input)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": f"Bearer {os.environ.get('OPENAI_API_KEY')}",
|
||||||
|
}
|
||||||
|
_update_headers_common(headers, request_func_input)
|
||||||
|
|
||||||
|
return await _run_openai_embeddings(
|
||||||
|
session,
|
||||||
|
api_url,
|
||||||
|
payload=payload,
|
||||||
|
headers=headers,
|
||||||
|
pbar=pbar,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def async_request_infinity_embeddings_clip(
|
||||||
|
request_func_input: RequestFuncInput,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
pbar: Optional[tqdm] = None,
|
||||||
|
) -> RequestFuncOutput:
|
||||||
|
_preprocess_clip(request_func_input)
|
||||||
|
|
||||||
|
return await async_request_infinity_embeddings(
|
||||||
|
request_func_input,
|
||||||
|
session,
|
||||||
|
pbar=pbar,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# TODO: Add more request functions for different API protocols.
|
# TODO: Add more request functions for different API protocols.
|
||||||
ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = {
|
ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = {
|
||||||
"vllm": async_request_openai_completions,
|
"vllm": async_request_openai_completions,
|
||||||
@ -655,6 +718,10 @@ ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = {
|
|||||||
"openai-embeddings-chat": async_request_openai_embeddings_chat,
|
"openai-embeddings-chat": async_request_openai_embeddings_chat,
|
||||||
"openai-embeddings-clip": async_request_openai_embeddings_clip,
|
"openai-embeddings-clip": async_request_openai_embeddings_clip,
|
||||||
"openai-embeddings-vlm2vec": async_request_openai_embeddings_vlm2vec,
|
"openai-embeddings-vlm2vec": async_request_openai_embeddings_vlm2vec,
|
||||||
|
# Infinity embedding server: https://github.com/michaelfeil/infinity
|
||||||
|
"infinity-embeddings": async_request_infinity_embeddings,
|
||||||
|
"infinity-embeddings-clip": async_request_infinity_embeddings_clip,
|
||||||
|
# (Infinity embedding server does not support vlm2vec)
|
||||||
}
|
}
|
||||||
|
|
||||||
OPENAI_COMPATIBLE_BACKENDS = [
|
OPENAI_COMPATIBLE_BACKENDS = [
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user