mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-24 08:31:18 +08:00
[Benchmark] Use truncation by default for pooling benchmarks (#26992)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
44c8555621
commit
17838e50ef
@ -527,6 +527,9 @@ async def async_request_openai_embeddings(
|
|||||||
if request_func_input.model_name
|
if request_func_input.model_name
|
||||||
else request_func_input.model,
|
else request_func_input.model,
|
||||||
"input": request_func_input.prompt,
|
"input": request_func_input.prompt,
|
||||||
|
# Many embedding models have short context length,
|
||||||
|
# this is to avoid dropping some of the requests.
|
||||||
|
"truncate_prompt_tokens": -1,
|
||||||
}
|
}
|
||||||
_update_payload_common(payload, request_func_input)
|
_update_payload_common(payload, request_func_input)
|
||||||
|
|
||||||
@ -564,6 +567,9 @@ async def async_request_vllm_rerank(
|
|||||||
else request_func_input.model,
|
else request_func_input.model,
|
||||||
"query": request_func_input.prompt[0],
|
"query": request_func_input.prompt[0],
|
||||||
"documents": request_func_input.prompt[1:],
|
"documents": request_func_input.prompt[1:],
|
||||||
|
# Many reranker models have short context length,
|
||||||
|
# this is to avoid dropping some of the requests.
|
||||||
|
"truncate_prompt_tokens": -1,
|
||||||
}
|
}
|
||||||
|
|
||||||
headers = {
|
headers = {
|
||||||
@ -599,6 +605,9 @@ async def async_request_openai_embeddings_chat(
|
|||||||
"messages": [
|
"messages": [
|
||||||
{"role": "user", "content": content},
|
{"role": "user", "content": content},
|
||||||
],
|
],
|
||||||
|
# Many embedding models have short context length,
|
||||||
|
# this is to avoid dropping some of the requests.
|
||||||
|
"truncate_prompt_tokens": -1,
|
||||||
}
|
}
|
||||||
_update_payload_common(payload, request_func_input)
|
_update_payload_common(payload, request_func_input)
|
||||||
|
|
||||||
@ -634,13 +643,6 @@ def _preprocess_clip(request_func_input: RequestFuncInput):
|
|||||||
# Image input
|
# Image input
|
||||||
request_func_input.prompt = ""
|
request_func_input.prompt = ""
|
||||||
|
|
||||||
# max_model_len=77 is too short for most datasets,
|
|
||||||
# so by default we truncate the prompt to max_model_len
|
|
||||||
if request_func_input.extra_body is None:
|
|
||||||
request_func_input.extra_body = {}
|
|
||||||
if "truncate_prompt_tokens" not in request_func_input.extra_body:
|
|
||||||
request_func_input.extra_body["truncate_prompt_tokens"] = -1
|
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_vlm2vec(request_func_input: RequestFuncInput):
|
def _preprocess_vlm2vec(request_func_input: RequestFuncInput):
|
||||||
if request_func_input.multi_modal_content:
|
if request_func_input.multi_modal_content:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user